From 565b4ea1ae101db9343eb339676c42b5c51544ca Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 10 Sep 2013 08:58:45 +0100
Subject: [PATCH 01/84] Skeleton sparse reordering feature

---
 moses/FF/Factory.cpp                 |  2 +
 moses/FF/SparseReorderingFeature.cpp | 26 ++++++++++++
 moses/FF/SparseReorderingFeature.h   | 60 ++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)
 create mode 100644 moses/FF/SparseReorderingFeature.cpp
 create mode 100644 moses/FF/SparseReorderingFeature.h
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 01b12d9207..3bf702d412 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -25,6 +25,7 @@
 #include "moses/FF/PhrasePairFeature.h"
 #include "moses/FF/PhraseLengthFeature.h"
 #include "moses/FF/DistortionScoreProducer.h"
+#include "moses/FF/SparseReorderingFeature.h"
 #include "moses/FF/WordPenaltyProducer.h"
 #include "moses/FF/InputFeature.h"
 #include "moses/FF/PhrasePenalty.h"
@@ -142,6 +143,7 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(ControlRecombination);
   MOSES_FNAME(SkeletonStatelessFF);
   MOSES_FNAME(SkeletonStatefulFF);
+  MOSES_FNAME(SparseReorderingFeature);
 
 #ifdef HAVE_SYNLM
   MOSES_FNAME(SyntacticLanguageModel);
diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
new file mode 100644
index 0000000000..3955a5fdaa
--- /dev/null
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -0,0 +1,26 @@
+#include <iostream>
+
+#include "SparseReorderingFeature.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
+  :StatefulFeatureFunction("StatefulFeatureFunction", line)
+{
+  cerr << "Constructing a Sparse Reordering feature" << endl;
+}
+
+FFState* SparseReorderingFeature::EvaluateChart(
+  const ChartHypothesis& /* cur_hypo */,
+  int /* featureID - used to index the state in the previous hypotheses */,
+  ScoreComponentCollection* accumulator) const
+{
+  return new SparseReorderingState();
+}
+
+
+}
+
diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h
new file mode 100644
index 0000000000..daf137a09c
--- /dev/null
+++ b/moses/FF/SparseReorderingFeature.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <string>
+
+#include "StatefulFeatureFunction.h"
+#include "FFState.h"
+
+namespace Moses
+{
+
+class SparseReorderingState : public FFState
+{
+public:
+	int Compare(const FFState& other) const
+	{
+		return 0;
+	}
+};
+
+class SparseReorderingFeature : public StatefulFeatureFunction
+{
+public:
+	SparseReorderingFeature(const std::string &line);
+
+	bool IsUseable(const FactorMask &mask) const
+		{ return true; }
+
+	void Evaluate(const Phrase &source
+	                        , const TargetPhrase &targetPhrase
+	                        , ScoreComponentCollection &scoreBreakdown
+	                        , ScoreComponentCollection &estimatedFutureScore) const
+	{}
+	void Evaluate(const InputType &input
+	                        , const InputPath &inputPath
+	                        , ScoreComponentCollection &scoreBreakdown) const
+	{}
+	  FFState* Evaluate(
+	    const Hypothesis& cur_hypo,
+	    const FFState* prev_state,
+	    ScoreComponentCollection* accumulator) const
+	  {
+		  return new SparseReorderingState();
+	  }
+
+	  FFState* EvaluateChart(
+	    const ChartHypothesis& /* cur_hypo */,
+	    int /* featureID - used to index the state in the previous hypotheses */,
+	    ScoreComponentCollection* accumulator) const;
+
+	  virtual const FFState* EmptyHypothesisState(const InputType &input) const
+	  {
+		  return new SparseReorderingState();
+	  }
+
+
+};
+
+
+}
+

From f7c53fef552fb1c14047dd0577ad9ade8da5acd0 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 10 Sep 2013 11:20:14 +0100
Subject: [PATCH 02/84] Set dense feature count to 0

---
 moses/FF/SparseReorderingFeature.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 3955a5fdaa..308093617b 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -1,5 +1,7 @@
 #include <iostream>
 
+#include "moses/ChartHypothesis.h"
+
 #include "SparseReorderingFeature.h"
 
 using namespace std;
@@ -8,16 +10,20 @@ namespace Moses
 {
 
 SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
-  :StatefulFeatureFunction("StatefulFeatureFunction", line)
+  :StatefulFeatureFunction("StatefulFeatureFunction",0, line)
 {
   cerr << "Constructing a Sparse Reordering feature" << endl;
 }
 
 FFState* SparseReorderingFeature::EvaluateChart(
-  const ChartHypothesis& /* cur_hypo */,
-  int /* featureID - used to index the state in the previous hypotheses */,
+  const ChartHypothesis&  cur_hypo ,
+  int  featureID /*- used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
 {
+  // get index map for underlying hypotheses
+  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+    cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
+
   return new SparseReorderingState();
 }
 

From bd7f9344b8c2642b9ab88196e245cc8e6367e41d Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Wed, 11 Sep 2013 22:10:23 +0100
Subject: [PATCH 03/84] Pairs of non-terminals

---
 moses/FF/SparseReorderingFeature.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 308093617b..1a4becba6a 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -23,6 +23,26 @@ FFState* SparseReorderingFeature::EvaluateChart(
   // get index map for underlying hypotheses
   const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
     cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
+  
+  //Find all the pairs of non-terminals
+  //Are they forward or reversed relative to each other?
+  //Add features for their boundary words
+
+  //Get mapping from target to source, in target order
+  vector<pair<size_t, size_t> > targetNTs; //(srcIdx,targetPos)
+  for (size_t targetIdx = 0; targetIdx < nonTermIndexMap.size(); ++targetIdx) {
+    size_t srcNTIdx;
+    if ((srcNTIdx = nonTermIndexMap[targetIdx]) == NOT_FOUND) continue;
+    targetNTs.push_back(pair<size_t,size_t> (srcNTIdx,targetIdx));
+  }
+  for (size_t i = 0; i < targetNTs.size(); ++i) {
+    for (size_t j = i+1; j < targetNTs.size(); ++j) {
+      size_t src1 = targetNTs[i].first;
+      size_t src2 = targetNTs[j].first;
+      //NT pair (src1,src2) maps to (i,j)
+      cerr << src1 << " -> " << i << " , " << src2 << " -> " << j << endl; 
+    }
+  }
 
   return new SparseReorderingState();
 }

From 7047b4e197de8fb70c56dcbf00fe19cdf67aea0b Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Thu, 12 Sep 2013 13:39:59 +0100
Subject: [PATCH 04/84] Sparse reordering for non-terminal pairs

---
 moses/FF/SparseReorderingFeature.cpp | 42 +++++++++++++++++++++++++++-
 moses/FF/SparseReorderingFeature.h   |  5 ++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 1a4becba6a..16e175fff5 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -1,6 +1,8 @@
 #include <iostream>
 
 #include "moses/ChartHypothesis.h"
+#include "moses/ChartManager.h"
+#include "moses/Sentence.h"
 
 #include "SparseReorderingFeature.h"
 
@@ -15,6 +17,34 @@ SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
   cerr << "Constructing a Sparse Reordering feature" << endl;
 }
 
+static void AddFeatureWordPair(const string& prefix, const string& suffix,
+  const Word& word1, const Word& word2, ScoreComponentCollection* accumulator, FactorType factor = 0) {
+  stringstream buf;
+  buf << prefix << word1[factor]->GetString() << "_" << word2[factor]->GetString() << suffix;
+  accumulator->SparsePlusEquals(buf.str(), 1);
+}
+  
+
+void SparseReorderingFeature::AddNonTerminalPairFeatures(
+  const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
+    bool isMonotone, ScoreComponentCollection* accumulator) const {
+  //TODO: remove string concatenation
+  const static string monotone = "_M";
+  const static string swap = "_S";
+  const static string prefixes[] = 
+    { "srf_slslw_", "srf_slsrw_", "srf_srslw_", "srf_srsrw_"};
+
+  string direction = isMonotone ? monotone : swap;
+  AddFeatureWordPair(prefixes[0], direction,
+     sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetStartPos()), accumulator);
+  AddFeatureWordPair(prefixes[1], direction,
+     sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetEndPos()),  accumulator);
+  AddFeatureWordPair(prefixes[2], direction,
+     sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator);
+  AddFeatureWordPair(prefixes[3], direction,
+     sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator);
+}
+
 FFState* SparseReorderingFeature::EvaluateChart(
   const ChartHypothesis&  cur_hypo ,
   int  featureID /*- used to index the state in the previous hypotheses */,
@@ -35,12 +65,22 @@ FFState* SparseReorderingFeature::EvaluateChart(
     if ((srcNTIdx = nonTermIndexMap[targetIdx]) == NOT_FOUND) continue;
     targetNTs.push_back(pair<size_t,size_t> (srcNTIdx,targetIdx));
   }
+  //Add features for pairs of non-terminals
   for (size_t i = 0; i < targetNTs.size(); ++i) {
     for (size_t j = i+1; j < targetNTs.size(); ++j) {
       size_t src1 = targetNTs[i].first;
       size_t src2 = targetNTs[j].first;
       //NT pair (src1,src2) maps to (i,j)
-      cerr << src1 << " -> " << i << " , " << src2 << " -> " << j << endl; 
+      bool isMonotone = true;
+      if ((src1 < src2 && i > j) || (src1 > src2 && i < j)) isMonotone = false;
+      //NB: should throw bad_cast for Lattice input
+      const Sentence& sentence = 
+        dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
+      AddNonTerminalPairFeatures(sentence,
+        cur_hypo.GetPrevHypo(src1)->GetCurrSourceRange(),
+        cur_hypo.GetPrevHypo(src2)->GetCurrSourceRange(),
+        isMonotone,
+        accumulator);
     }
   }
 
diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h
index daf137a09c..73f1670cf1 100644
--- a/moses/FF/SparseReorderingFeature.h
+++ b/moses/FF/SparseReorderingFeature.h
@@ -52,6 +52,11 @@ class SparseReorderingFeature : public StatefulFeatureFunction
 		  return new SparseReorderingState();
 	  }
 
+private:
+
+void AddNonTerminalPairFeatures(
+  const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
+    bool isMonotone, ScoreComponentCollection* accumulator) const;
 
 };
 

From 3dff33069499d817af145ab9e6585022ef912df3 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Thu, 12 Sep 2013 18:55:10 +0100
Subject: [PATCH 05/84] Configuration

---
 moses/FF/SparseReorderingFeature.cpp | 54 +++++++++++++++++++++++++---
 moses/FF/SparseReorderingFeature.h   | 20 +++++++++--
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 16e175fff5..6127fde414 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -4,6 +4,8 @@
 #include "moses/ChartManager.h"
 #include "moses/Sentence.h"
 
+#include "util/exception.hh"
+
 #include "SparseReorderingFeature.h"
 
 using namespace std;
@@ -12,9 +14,47 @@ namespace Moses
 {
 
 SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
-  :StatefulFeatureFunction("StatefulFeatureFunction",0, line)
+  :StatefulFeatureFunction("StatefulFeatureFunction",0, line),
+  m_sourceFactor(0),
+  m_targetFactor(0),
+  m_sourceVocabFile(""),
+  m_targetVocabFile("")
 {
+
+  /*
+    Configuration of features.
+      factor - Which factor should it apply to
+      type - what type of sparse reordering feature. e.g. block (modelled on Matthias
+        Huck's EAMT 2012 features)
+      word - which words to include, e.g. src_bdry, src_all, tgt_bdry , ...
+      vocab - vocab file to limit it to
+      orientation - e.g. lr, etc.
+  */
   cerr << "Constructing a Sparse Reordering feature" << endl;
+  ReadParameters();
+  LoadVocabulary(m_sourceVocabFile, m_sourceVocab);
+  LoadVocabulary(m_targetVocabFile, m_targetVocab);
+}
+
+void SparseReorderingFeature::SetParameter(const std::string& key, const std::string& value) {
+  if (key == "input-factor") {
+    m_sourceFactor = Scan<FactorType>(value);
+  } else if (key == "output-factor") {
+    m_targetFactor = Scan<FactorType>(value);
+  } else if (key == "input-vocab-file") {
+    m_sourceVocabFile = value;
+  } else if (key == "output-vocab-file") {
+    m_targetVocabFile = value;
+  } else {
+    FeatureFunction::SetParameter(key, value);
+  }
+}
+
+void SparseReorderingFeature::LoadVocabulary(const std::string& filename, boost::unordered_set<std::string>& vocab)
+{
+  if (filename.empty()) return;
+  ifstream in(filename.c_str());
+  UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename);
 }
 
 static void AddFeatureWordPair(const string& prefix, const string& suffix,
@@ -54,9 +94,15 @@ FFState* SparseReorderingFeature::EvaluateChart(
   const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
     cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
   
-  //Find all the pairs of non-terminals
-  //Are they forward or reversed relative to each other?
-  //Add features for their boundary words
+  //The Huck features. For a rule with source side:
+  //   abXcdXef
+  //We first have to split into blocks:
+  // ab X cd X ef
+  //Then we extract features based in the boundary words of the neighbouring blocks
+  //For the block pair, we use the right word of the left block, and the left 
+  //word of the right block.
+
+  WordsRange sourceRange = cur_hypo.GetCurrSourceRange();
 
   //Get mapping from target to source, in target order
   vector<pair<size_t, size_t> > targetNTs; //(srcIdx,targetPos)
diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h
index 73f1670cf1..408b00b342 100644
--- a/moses/FF/SparseReorderingFeature.h
+++ b/moses/FF/SparseReorderingFeature.h
@@ -2,6 +2,8 @@
 
 #include <string>
 
+#include <boost/unordered_set.hpp>
+
 #include "StatefulFeatureFunction.h"
 #include "FFState.h"
 
@@ -25,6 +27,8 @@ class SparseReorderingFeature : public StatefulFeatureFunction
 	bool IsUseable(const FactorMask &mask) const
 		{ return true; }
 
+  void SetParameter(const std::string& key, const std::string& value);
+
 	void Evaluate(const Phrase &source
 	                        , const TargetPhrase &targetPhrase
 	                        , ScoreComponentCollection &scoreBreakdown
@@ -54,9 +58,19 @@ class SparseReorderingFeature : public StatefulFeatureFunction
 
 private:
 
-void AddNonTerminalPairFeatures(
-  const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
-    bool isMonotone, ScoreComponentCollection* accumulator) const;
+  void AddNonTerminalPairFeatures(
+    const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
+      bool isMonotone, ScoreComponentCollection* accumulator) const;
+
+  void LoadVocabulary(const std::string& filename, boost::unordered_set<std::string>& vocab);
+
+  FactorType m_sourceFactor;
+  FactorType m_targetFactor;
+  std::string m_sourceVocabFile;
+  std::string m_targetVocabFile;
+
+  boost::unordered_set<std::string> m_sourceVocab;
+  boost::unordered_set<std::string> m_targetVocab;
 
 };
 

From 8651f8da894c45c83d946745c599c04067811be2 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 13 Sep 2013 08:48:44 +0100
Subject: [PATCH 06/84] Extract blocks

---
 moses/FF/SparseReorderingFeature.cpp | 45 +++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 6127fde414..3e955bce26 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -102,7 +102,50 @@ FFState* SparseReorderingFeature::EvaluateChart(
   //For the block pair, we use the right word of the left block, and the left 
   //word of the right block.
 
-  WordsRange sourceRange = cur_hypo.GetCurrSourceRange();
+  //Need to get blocks, and their alignment. Each block has a word range (on the 
+  // on the source), a non-terminal flag, and  a set of alignment points in the target phrase
+
+  vector<WordsRange> sourceNTSpans;
+  for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) {
+    sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange());
+  }
+  sort(sourceNTSpans.begin(), sourceNTSpans.end()); //put in source order
+  cerr << "Source NTs: ";
+  for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " ";
+  cerr << endl;
+
+  vector<WordsRange> blocks;
+  blocks.push_back(cur_hypo.GetCurrSourceRange());
+  for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin(); 
+      i != sourceNTSpans.end(); ++i) {
+    const WordsRange& prevHypoRange = *i;
+    WordsRange lastRange = blocks.back();
+    blocks.pop_back();
+    //split this range into before NT, NT and after NT
+    if (prevHypoRange.GetStartPos() > lastRange.GetStartPos()) {
+      blocks.push_back(WordsRange(lastRange.GetStartPos(),prevHypoRange.GetStartPos()-1));
+    }
+    blocks.push_back(prevHypoRange);
+    if (prevHypoRange.GetEndPos() < lastRange.GetEndPos()) {
+      blocks.push_back(WordsRange(prevHypoRange.GetEndPos()+1,lastRange.GetEndPos()));
+    }
+  }
+  cerr << "Blocks: ";
+  for (size_t i = 0; i < blocks.size(); ++i) cerr << blocks[i] << " ";
+  cerr << endl;
+
+  //this currently doesn't work
+  const InputPath* inputPath = cur_hypo.GetTranslationOption().GetInputPath();
+  //The phrase is always dangling
+  //cerr << "IP: phrase " << inputPath << endl;
+  /*
+  cerr << "NTs ";
+  for (NonTerminalSet::const_iterator i = inputPath->GetNonTerminalSet().begin();
+    i != inputPath->GetNonTerminalSet().end(); ++i) {
+    cerr << *i << " ";
+  }
+  cerr << endl;
+  */
 
   //Get mapping from target to source, in target order
   vector<pair<size_t, size_t> > targetNTs; //(srcIdx,targetPos)

From 9d874b0ac13da8e9cd2f62942df9dea7d7a8b3df Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 13 Sep 2013 14:44:30 +0100
Subject: [PATCH 07/84] Prints out feature values

---
 moses/FF/SparseReorderingFeature.cpp | 134 ++++++++++++++++++++++-----
 1 file changed, 112 insertions(+), 22 deletions(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 3e955bce26..1fcd5e3b0c 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -91,8 +91,8 @@ FFState* SparseReorderingFeature::EvaluateChart(
   ScoreComponentCollection* accumulator) const
 {
   // get index map for underlying hypotheses
-  const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
-    cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
+  //const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+  //  cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap();
   
   //The Huck features. For a rule with source side:
   //   abXcdXef
@@ -105,48 +105,138 @@ FFState* SparseReorderingFeature::EvaluateChart(
   //Need to get blocks, and their alignment. Each block has a word range (on the 
   // on the source), a non-terminal flag, and  a set of alignment points in the target phrase
 
+  //We need to be able to map source word position to target word position, as
+  //much as possible (don't need interior of non-terminals). The alignment info
+  //objects just give us the mappings between *rule* positions. So if we can 
+  //map source word position to source rule position, and target rule position
+  //to target word position, then we can map right through.
+
+  size_t sourceStart = cur_hypo.GetCurrSourceRange().GetStartPos();
+  size_t sourceSize = cur_hypo.GetCurrSourceRange().GetNumWordsCovered();
+
   vector<WordsRange> sourceNTSpans;
   for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) {
     sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange());
   }
-  sort(sourceNTSpans.begin(), sourceNTSpans.end()); //put in source order
-  cerr << "Source NTs: ";
-  for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " ";
-  cerr << endl;
+  //put in source order. Is this necessary?
+  sort(sourceNTSpans.begin(), sourceNTSpans.end()); 
+  //cerr << "Source NTs: ";
+  //for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " ";
+  //cerr << endl;
 
-  vector<WordsRange> blocks;
-  blocks.push_back(cur_hypo.GetCurrSourceRange());
+  typedef pair<WordsRange,bool> Block;//flag indicates NT
+  vector<Block> sourceBlocks; 
+  sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false));
   for (vector<WordsRange>::const_iterator i = sourceNTSpans.begin(); 
       i != sourceNTSpans.end(); ++i) {
     const WordsRange& prevHypoRange = *i;
-    WordsRange lastRange = blocks.back();
-    blocks.pop_back();
+    Block lastBlock = sourceBlocks.back();
+    sourceBlocks.pop_back();
     //split this range into before NT, NT and after NT
-    if (prevHypoRange.GetStartPos() > lastRange.GetStartPos()) {
-      blocks.push_back(WordsRange(lastRange.GetStartPos(),prevHypoRange.GetStartPos()-1));
+    if (prevHypoRange.GetStartPos() > lastBlock.first.GetStartPos()) {
+      sourceBlocks.push_back(Block(WordsRange(lastBlock.first.GetStartPos(),prevHypoRange.GetStartPos()-1),false));
     }
-    blocks.push_back(prevHypoRange);
-    if (prevHypoRange.GetEndPos() < lastRange.GetEndPos()) {
-      blocks.push_back(WordsRange(prevHypoRange.GetEndPos()+1,lastRange.GetEndPos()));
+    sourceBlocks.push_back(Block(prevHypoRange,true));
+    if (prevHypoRange.GetEndPos() < lastBlock.first.GetEndPos()) {
+      sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false));
     }
   }
-  cerr << "Blocks: ";
-  for (size_t i = 0; i < blocks.size(); ++i) cerr << blocks[i] << " ";
+  cerr << "Source Blocks: ";
+  for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " "
+      << (sourceBlocks[i].second ? "NT" : "T") << " ";
   cerr << endl;
 
-  //this currently doesn't work
+  //Mapping from source word to target rule position
+  vector<size_t> sourceWordToTargetRulePos(sourceSize);
+  map<size_t,size_t> alignMap;
+  alignMap.insert(
+    cur_hypo.GetCurrTargetPhrase().GetAlignTerm().begin(),
+    cur_hypo.GetCurrTargetPhrase().GetAlignTerm().end());
+  alignMap.insert(
+    cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().begin(),
+    cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().end());
+  //vector<size_t> alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm()
+  size_t sourceRulePos = 0;
+  //cerr << "SW->RP ";
+  for (vector<Block>::const_iterator sourceBlockIt = sourceBlocks.begin(); 
+    sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) {
+    for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos();
+      sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) {
+      sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos];
+   //   cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " ";
+      if (! sourceBlockIt->second) {
+        //T
+        ++sourceRulePos;
+      }
+    }
+    if ( sourceBlockIt->second) {
+      //NT
+      ++sourceRulePos;
+    }
+  }
+  //cerr << endl;
+
+  /**
   const InputPath* inputPath = cur_hypo.GetTranslationOption().GetInputPath();
-  //The phrase is always dangling
-  //cerr << "IP: phrase " << inputPath << endl;
-  /*
+  cerr << "IP phrase: " << inputPath->GetPhrase() << endl;
   cerr << "NTs ";
   for (NonTerminalSet::const_iterator i = inputPath->GetNonTerminalSet().begin();
     i != inputPath->GetNonTerminalSet().end(); ++i) {
     cerr << *i << " ";
   }
   cerr << endl;
+  **/
+  //Iterate through block pairs
+  const Sentence& sentence = 
+    dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
+  //const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+  for (size_t i = 0; i < sourceBlocks.size()-1; ++i) {
+    Block& leftSourceBlock = sourceBlocks[i];
+    Block& rightSourceBlock = sourceBlocks[i+1];
+    size_t sourceLeftBoundaryPos = leftSourceBlock.first.GetEndPos();
+    size_t sourceRightBoundaryPos = rightSourceBlock.first.GetStartPos();
+    const Word& sourceLeftBoundaryWord = sentence.GetWord(sourceLeftBoundaryPos);
+    const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos);
+    sourceLeftBoundaryPos -= sourceStart;
+    sourceRightBoundaryPos -= sourceStart;
+    
+    // Need to figure out where these map to on the target.
+    size_t targetLeftRulePos = 
+      sourceWordToTargetRulePos[sourceLeftBoundaryPos];
+    size_t targetRightRulePos = 
+      sourceWordToTargetRulePos[sourceRightBoundaryPos];
+
+    bool isMonotone = true;
+    if ((sourceLeftBoundaryPos < sourceRightBoundaryPos  &&
+          targetLeftRulePos > targetRightRulePos) ||
+      ((sourceLeftBoundaryPos > sourceRightBoundaryPos  &&
+          targetLeftRulePos < targetRightRulePos)))
+    {
+      isMonotone = false;
+    }
+    cerr << sourceLeftBoundaryWord.GetFactor(0)->GetString() <<
+      "_" << sourceRightBoundaryWord.GetFactor(0)->GetString() << "_" 
+      <<  (isMonotone ? "M" : "S") << endl;
+  }
+  cerr << endl;
+
+  /*
+  cerr << "NT align ";
+  const AlignmentInfo& align = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm();
+  for (AlignmentInfo::CollType::const_iterator i = align.begin(); i != align.end(); ++i) {
+    cerr << i->first << "," << i->second << " ";
+  }
+  cerr << endl;
+
+  cerr << "T align ";
+  const AlignmentInfo& alignT = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
+  for (AlignmentInfo::CollType::const_iterator i = alignT.begin(); i != alignT.end(); ++i) {
+    cerr << i->first << "," << i->second << " ";
+  }
+  cerr << endl;
   */
 
+  /*
   //Get mapping from target to source, in target order
   vector<pair<size_t, size_t> > targetNTs; //(srcIdx,targetPos)
   for (size_t targetIdx = 0; targetIdx < nonTermIndexMap.size(); ++targetIdx) {
@@ -171,7 +261,7 @@ FFState* SparseReorderingFeature::EvaluateChart(
         isMonotone,
         accumulator);
     }
-  }
+  }*/
 
   return new SparseReorderingState();
 }

From 82369968b5ea9b18086b04e9935da72fd484a4d3 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 13 Sep 2013 16:52:42 +0100
Subject: [PATCH 08/84] vocabulary, type configuration

---
 moses/FF/SparseReorderingFeature.cpp | 123 +++++++++------------------
 moses/FF/SparseReorderingFeature.h   |  20 ++++-
 2 files changed, 55 insertions(+), 88 deletions(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 1fcd5e3b0c..63e7dddc90 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -2,6 +2,7 @@
 
 #include "moses/ChartHypothesis.h"
 #include "moses/ChartManager.h"
+#include "moses/FactorCollection.h"
 #include "moses/Sentence.h"
 
 #include "util/exception.hh"
@@ -15,6 +16,7 @@ namespace Moses
 
 SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
   :StatefulFeatureFunction("StatefulFeatureFunction",0, line),
+  m_type(SourceCombined),
   m_sourceFactor(0),
   m_targetFactor(0),
   m_sourceVocabFile(""),
@@ -32,6 +34,7 @@ SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
   */
   cerr << "Constructing a Sparse Reordering feature" << endl;
   ReadParameters();
+  m_otherFactor = FactorCollection::Instance().AddFactor("##OTHER##");
   LoadVocabulary(m_sourceVocabFile, m_sourceVocab);
   LoadVocabulary(m_targetVocabFile, m_targetVocab);
 }
@@ -45,44 +48,37 @@ void SparseReorderingFeature::SetParameter(const std::string& key, const std::st
     m_sourceVocabFile = value;
   } else if (key == "output-vocab-file") {
     m_targetVocabFile = value;
+  } else if (key == "type") {
+    if (value == "SourceCombined") {
+      m_type = SourceCombined;
+    } else if (value == "SourceLeft") {
+      m_type = SourceLeft;
+    } else if (value == "SourceRight") {
+      m_type = SourceRight;
+    } else {
+      UTIL_THROW(util::Exception, "Unknown sparse reordering type " << value);
+    }
   } else {
     FeatureFunction::SetParameter(key, value);
   }
 }
 
-void SparseReorderingFeature::LoadVocabulary(const std::string& filename, boost::unordered_set<std::string>& vocab)
+void SparseReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab)
 {
   if (filename.empty()) return;
   ifstream in(filename.c_str());
   UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename);
+  string line;
+  while(getline(in,line)) {
+    vocab.insert(FactorCollection::Instance().AddFactor(line)); 
+  }
+  in.close();
 }
 
-static void AddFeatureWordPair(const string& prefix, const string& suffix,
-  const Word& word1, const Word& word2, ScoreComponentCollection* accumulator, FactorType factor = 0) {
-  stringstream buf;
-  buf << prefix << word1[factor]->GetString() << "_" << word2[factor]->GetString() << suffix;
-  accumulator->SparsePlusEquals(buf.str(), 1);
-}
-  
-
-void SparseReorderingFeature::AddNonTerminalPairFeatures(
-  const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
-    bool isMonotone, ScoreComponentCollection* accumulator) const {
-  //TODO: remove string concatenation
-  const static string monotone = "_M";
-  const static string swap = "_S";
-  const static string prefixes[] = 
-    { "srf_slslw_", "srf_slsrw_", "srf_srslw_", "srf_srsrw_"};
-
-  string direction = isMonotone ? monotone : swap;
-  AddFeatureWordPair(prefixes[0], direction,
-     sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetStartPos()), accumulator);
-  AddFeatureWordPair(prefixes[1], direction,
-     sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetEndPos()),  accumulator);
-  AddFeatureWordPair(prefixes[2], direction,
-     sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator);
-  AddFeatureWordPair(prefixes[3], direction,
-     sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator);
+const Factor* SparseReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const {
+  const Factor* factor = word.GetFactor(factorType);
+  if (vocab.size() && vocab.find(factor) == vocab.end()) return m_otherFactor;
+  return factor;
 }
 
 FFState* SparseReorderingFeature::EvaluateChart(
@@ -141,10 +137,12 @@ FFState* SparseReorderingFeature::EvaluateChart(
       sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false));
     }
   }
+  /*
   cerr << "Source Blocks: ";
   for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " "
       << (sourceBlocks[i].second ? "NT" : "T") << " ";
   cerr << endl;
+  */
 
   //Mapping from source word to target rule position
   vector<size_t> sourceWordToTargetRulePos(sourceSize);
@@ -176,16 +174,6 @@ FFState* SparseReorderingFeature::EvaluateChart(
   }
   //cerr << endl;
 
-  /**
-  const InputPath* inputPath = cur_hypo.GetTranslationOption().GetInputPath();
-  cerr << "IP phrase: " << inputPath->GetPhrase() << endl;
-  cerr << "NTs ";
-  for (NonTerminalSet::const_iterator i = inputPath->GetNonTerminalSet().begin();
-    i != inputPath->GetNonTerminalSet().end(); ++i) {
-    cerr << *i << " ";
-  }
-  cerr << endl;
-  **/
   //Iterate through block pairs
   const Sentence& sentence = 
     dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
@@ -214,55 +202,20 @@ FFState* SparseReorderingFeature::EvaluateChart(
     {
       isMonotone = false;
     }
-    cerr << sourceLeftBoundaryWord.GetFactor(0)->GetString() <<
-      "_" << sourceRightBoundaryWord.GetFactor(0)->GetString() << "_" 
-      <<  (isMonotone ? "M" : "S") << endl;
-  }
-  cerr << endl;
-
-  /*
-  cerr << "NT align ";
-  const AlignmentInfo& align = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm();
-  for (AlignmentInfo::CollType::const_iterator i = align.begin(); i != align.end(); ++i) {
-    cerr << i->first << "," << i->second << " ";
-  }
-  cerr << endl;
-
-  cerr << "T align ";
-  const AlignmentInfo& alignT = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
-  for (AlignmentInfo::CollType::const_iterator i = alignT.begin(); i != alignT.end(); ++i) {
-    cerr << i->first << "," << i->second << " ";
-  }
-  cerr << endl;
-  */
-
-  /*
-  //Get mapping from target to source, in target order
-  vector<pair<size_t, size_t> > targetNTs; //(srcIdx,targetPos)
-  for (size_t targetIdx = 0; targetIdx < nonTermIndexMap.size(); ++targetIdx) {
-    size_t srcNTIdx;
-    if ((srcNTIdx = nonTermIndexMap[targetIdx]) == NOT_FOUND) continue;
-    targetNTs.push_back(pair<size_t,size_t> (srcNTIdx,targetIdx));
-  }
-  //Add features for pairs of non-terminals
-  for (size_t i = 0; i < targetNTs.size(); ++i) {
-    for (size_t j = i+1; j < targetNTs.size(); ++j) {
-      size_t src1 = targetNTs[i].first;
-      size_t src2 = targetNTs[j].first;
-      //NT pair (src1,src2) maps to (i,j)
-      bool isMonotone = true;
-      if ((src1 < src2 && i > j) || (src1 > src2 && i < j)) isMonotone = false;
-      //NB: should throw bad_cast for Lattice input
-      const Sentence& sentence = 
-        dynamic_cast<const Sentence&>(cur_hypo.GetManager().GetSource());
-      AddNonTerminalPairFeatures(sentence,
-        cur_hypo.GetPrevHypo(src1)->GetCurrSourceRange(),
-        cur_hypo.GetPrevHypo(src2)->GetCurrSourceRange(),
-        isMonotone,
-        accumulator);
+    stringstream buf;
+    buf << "sr_h_"; //sparse reordering, Huck
+    if (m_type == SourceLeft || m_type == SourceCombined) {
+      buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
+      buf << "_";
     }
-  }*/
-
+    if (m_type == SourceRight || m_type == SourceCombined) {
+    buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
+      buf << "_";
+    }
+    buf << (isMonotone ? "M" : "S");
+    accumulator->SparsePlusEquals(buf.str(), 1);
+  }
+//  cerr << endl;
   return new SparseReorderingState();
 }
 
diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h
index 408b00b342..021d276456 100644
--- a/moses/FF/SparseReorderingFeature.h
+++ b/moses/FF/SparseReorderingFeature.h
@@ -4,6 +4,8 @@
 
 #include <boost/unordered_set.hpp>
 
+#include <util/string_piece.hh>
+
 #include "StatefulFeatureFunction.h"
 #include "FFState.h"
 
@@ -22,6 +24,12 @@ class SparseReorderingState : public FFState
 class SparseReorderingFeature : public StatefulFeatureFunction
 {
 public:
+  enum Type {
+    SourceCombined,
+    SourceLeft,
+    SourceRight
+  };
+
 	SparseReorderingFeature(const std::string &line);
 
 	bool IsUseable(const FactorMask &mask) const
@@ -58,19 +66,25 @@ class SparseReorderingFeature : public StatefulFeatureFunction
 
 private:
 
+  typedef boost::unordered_set<const Factor*> Vocab;
+
   void AddNonTerminalPairFeatures(
     const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2,
       bool isMonotone, ScoreComponentCollection* accumulator) const;
 
-  void LoadVocabulary(const std::string& filename, boost::unordered_set<std::string>& vocab);
+  void LoadVocabulary(const std::string& filename, Vocab& vocab);
+  const Factor*  GetFactor(const Word& word, const Vocab& vocab, FactorType factor) const;
 
+  Type m_type;
   FactorType m_sourceFactor;
   FactorType m_targetFactor;
   std::string m_sourceVocabFile;
   std::string m_targetVocabFile;
 
-  boost::unordered_set<std::string> m_sourceVocab;
-  boost::unordered_set<std::string> m_targetVocab;
+  const Factor* m_otherFactor;
+  
+  Vocab m_sourceVocab;
+  Vocab m_targetVocab;
 
 };
 

From 0496363db34ebd80313a226006b6083f501e67df Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 13 Sep 2013 17:38:14 +0100
Subject: [PATCH 09/84] Convert to stateless

---
 moses/FF/SparseReorderingFeature.cpp |  6 ++---
 moses/FF/SparseReorderingFeature.h   | 37 +++++++---------------------
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 63e7dddc90..8703a2765b 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -15,7 +15,7 @@ namespace Moses
 {
 
 SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
-  :StatefulFeatureFunction("StatefulFeatureFunction",0, line),
+  :StatelessFeatureFunction("StatefulFeatureFunction",0, line),
   m_type(SourceCombined),
   m_sourceFactor(0),
   m_targetFactor(0),
@@ -81,9 +81,8 @@ const Factor* SparseReorderingFeature::GetFactor(const Word& word, const Vocab&
   return factor;
 }
 
-FFState* SparseReorderingFeature::EvaluateChart(
+void SparseReorderingFeature::EvaluateChart(
   const ChartHypothesis&  cur_hypo ,
-  int  featureID /*- used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
 {
   // get index map for underlying hypotheses
@@ -216,7 +215,6 @@ FFState* SparseReorderingFeature::EvaluateChart(
     accumulator->SparsePlusEquals(buf.str(), 1);
   }
 //  cerr << endl;
-  return new SparseReorderingState();
 }
 
 
diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h
index 021d276456..200200806d 100644
--- a/moses/FF/SparseReorderingFeature.h
+++ b/moses/FF/SparseReorderingFeature.h
@@ -6,22 +6,13 @@
 
 #include <util/string_piece.hh>
 
-#include "StatefulFeatureFunction.h"
+#include "StatelessFeatureFunction.h"
 #include "FFState.h"
 
 namespace Moses
 {
 
-class SparseReorderingState : public FFState
-{
-public:
-	int Compare(const FFState& other) const
-	{
-		return 0;
-	}
-};
-
-class SparseReorderingFeature : public StatefulFeatureFunction
+class SparseReorderingFeature : public StatelessFeatureFunction
 {
 public:
   enum Type {
@@ -46,23 +37,13 @@ class SparseReorderingFeature : public StatefulFeatureFunction
 	                        , const InputPath &inputPath
 	                        , ScoreComponentCollection &scoreBreakdown) const
 	{}
-	  FFState* Evaluate(
-	    const Hypothesis& cur_hypo,
-	    const FFState* prev_state,
-	    ScoreComponentCollection* accumulator) const
-	  {
-		  return new SparseReorderingState();
-	  }
-
-	  FFState* EvaluateChart(
-	    const ChartHypothesis& /* cur_hypo */,
-	    int /* featureID - used to index the state in the previous hypotheses */,
-	    ScoreComponentCollection* accumulator) const;
-
-	  virtual const FFState* EmptyHypothesisState(const InputType &input) const
-	  {
-		  return new SparseReorderingState();
-	  }
+
+  virtual void Evaluate(const Hypothesis& hypo,
+                        ScoreComponentCollection* accumulator) const
+  {}
+  void EvaluateChart(const ChartHypothesis &hypo,
+                             ScoreComponentCollection* accumulator) const;
+
 
 private:
 

From f816a138efd2a00d451609ba1afca8c33be417c9 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 13 Sep 2013 18:21:52 +0100
Subject: [PATCH 10/84] feature name

---
 moses/FF/SparseReorderingFeature.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp
index 8703a2765b..0203406b07 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseReorderingFeature.cpp
@@ -15,7 +15,7 @@ namespace Moses
 {
 
 SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
-  :StatelessFeatureFunction("StatefulFeatureFunction",0, line),
+  :StatelessFeatureFunction("SparseReorderingFeature",0, line),
   m_type(SourceCombined),
   m_sourceFactor(0),
   m_targetFactor(0),
@@ -202,7 +202,7 @@ void SparseReorderingFeature::EvaluateChart(
       isMonotone = false;
     }
     stringstream buf;
-    buf << "sr_h_"; //sparse reordering, Huck
+    buf << "h_"; //sparse reordering, Huck
     if (m_type == SourceLeft || m_type == SourceCombined) {
       buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString();
       buf << "_";
@@ -212,7 +212,7 @@ void SparseReorderingFeature::EvaluateChart(
       buf << "_";
     }
     buf << (isMonotone ? "M" : "S");
-    accumulator->SparsePlusEquals(buf.str(), 1);
+    accumulator->PlusEquals(this,buf.str(), 1);
   }
 //  cerr << endl;
 }

From d737a352b373363c1bacc72bc5f20221fea90712 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Sat, 14 Sep 2013 17:22:43 +0100
Subject: [PATCH 11/84] Renamed

---
 moses/FF/Factory.cpp                               |  4 ++--
 ...eature.cpp => SparseHieroReorderingFeature.cpp} | 14 +++++++-------
 ...ingFeature.h => SparseHieroReorderingFeature.h} |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)
 rename moses/FF/{SparseReorderingFeature.cpp => SparseHieroReorderingFeature.cpp} (93%)
 rename moses/FF/{SparseReorderingFeature.h => SparseHieroReorderingFeature.h} (93%)

diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 24efde4c3e..c9a6a3f105 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -25,7 +25,7 @@
 #include "moses/FF/PhrasePairFeature.h"
 #include "moses/FF/PhraseLengthFeature.h"
 #include "moses/FF/DistortionScoreProducer.h"
-#include "moses/FF/SparseReorderingFeature.h"
+#include "moses/FF/SparseHieroReorderingFeature.h"
 #include "moses/FF/WordPenaltyProducer.h"
 #include "moses/FF/InputFeature.h"
 #include "moses/FF/PhrasePenalty.h"
@@ -145,7 +145,7 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(ControlRecombination);
   MOSES_FNAME(SkeletonStatelessFF);
   MOSES_FNAME(SkeletonStatefulFF);
-  MOSES_FNAME(SparseReorderingFeature);
+  MOSES_FNAME(SparseHieroReorderingFeature);
   MOSES_FNAME(ExternalFeature);
 
 #ifdef HAVE_SYNLM
diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp
similarity index 93%
rename from moses/FF/SparseReorderingFeature.cpp
rename to moses/FF/SparseHieroReorderingFeature.cpp
index 0203406b07..bdb18c787e 100644
--- a/moses/FF/SparseReorderingFeature.cpp
+++ b/moses/FF/SparseHieroReorderingFeature.cpp
@@ -7,15 +7,15 @@
 
 #include "util/exception.hh"
 
-#include "SparseReorderingFeature.h"
+#include "SparseHieroReorderingFeature.h"
 
 using namespace std;
 
 namespace Moses
 {
 
-SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
-  :StatelessFeatureFunction("SparseReorderingFeature",0, line),
+SparseHieroReorderingFeature::SparseHieroReorderingFeature(const std::string &line)
+  :StatelessFeatureFunction("SparseHieroReorderingFeature",0, line),
   m_type(SourceCombined),
   m_sourceFactor(0),
   m_targetFactor(0),
@@ -39,7 +39,7 @@ SparseReorderingFeature::SparseReorderingFeature(const std::string &line)
   LoadVocabulary(m_targetVocabFile, m_targetVocab);
 }
 
-void SparseReorderingFeature::SetParameter(const std::string& key, const std::string& value) {
+void SparseHieroReorderingFeature::SetParameter(const std::string& key, const std::string& value) {
   if (key == "input-factor") {
     m_sourceFactor = Scan<FactorType>(value);
   } else if (key == "output-factor") {
@@ -63,7 +63,7 @@ void SparseReorderingFeature::SetParameter(const std::string& key, const std::st
   }
 }
 
-void SparseReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab)
+void SparseHieroReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab)
 {
   if (filename.empty()) return;
   ifstream in(filename.c_str());
@@ -75,13 +75,13 @@ void SparseReorderingFeature::LoadVocabulary(const std::string& filename, Vocab&
   in.close();
 }
 
-const Factor* SparseReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const {
+const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const {
   const Factor* factor = word.GetFactor(factorType);
   if (vocab.size() && vocab.find(factor) == vocab.end()) return m_otherFactor;
   return factor;
 }
 
-void SparseReorderingFeature::EvaluateChart(
+void SparseHieroReorderingFeature::EvaluateChart(
   const ChartHypothesis&  cur_hypo ,
   ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
similarity index 93%
rename from moses/FF/SparseReorderingFeature.h
rename to moses/FF/SparseHieroReorderingFeature.h
index 200200806d..7059b73158 100644
--- a/moses/FF/SparseReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -12,7 +12,7 @@
 namespace Moses
 {
 
-class SparseReorderingFeature : public StatelessFeatureFunction
+class SparseHieroReorderingFeature : public StatelessFeatureFunction
 {
 public:
   enum Type {
@@ -21,7 +21,7 @@ class SparseReorderingFeature : public StatelessFeatureFunction
     SourceRight
   };
 
-	SparseReorderingFeature(const std::string &line);
+	SparseHieroReorderingFeature(const std::string &line);
 
 	bool IsUseable(const FactorMask &mask) const
 		{ return true; }

From 266e36c4401f3235bc2c1bd292e0fc324ae3f7e4 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Wed, 18 Sep 2013 21:58:38 +0100
Subject: [PATCH 12/84] stub out unit test

---
 moses/FF/SparseHieroReorderingFeature.h       |  3 ++
 moses/FF/SparseHieroReorderingFeatureTest.cpp | 36 +++++++++++++++++++
 moses/Jamfile                                 |  4 +--
 3 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 moses/FF/SparseHieroReorderingFeatureTest.cpp

diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
index 7059b73158..ec220af036 100644
--- a/moses/FF/SparseHieroReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -6,6 +6,9 @@
 
 #include <util/string_piece.hh>
 
+#include "moses/Factor.h"
+#include "moses/Sentence.h"
+
 #include "StatelessFeatureFunction.h"
 #include "FFState.h"
 
diff --git a/moses/FF/SparseHieroReorderingFeatureTest.cpp b/moses/FF/SparseHieroReorderingFeatureTest.cpp
new file mode 100644
index 0000000000..f05355df91
--- /dev/null
+++ b/moses/FF/SparseHieroReorderingFeatureTest.cpp
@@ -0,0 +1,36 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2013- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#include <iostream>
+
+#include <boost/test/unit_test.hpp>
+
+#include "SparseHieroReorderingFeature.h"
+
+using namespace Moses;
+using namespace std;
+
+BOOST_AUTO_TEST_SUITE(shrf)
+
+BOOST_AUTO_TEST_CASE(lexical_rule)
+{
+  SparseHieroReorderingFeature feature("name=shrf");
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/moses/Jamfile b/moses/Jamfile
index 26a98c4c98..b344415858 100644
--- a/moses/Jamfile
+++ b/moses/Jamfile
@@ -64,7 +64,7 @@ lib moses :
 : #exceptions
   ThreadPool.cpp
   SyntacticLanguageModel.cpp
-  *Test.cpp Mock*.cpp
+  *Test.cpp Mock*.cpp FF/*Test.cpp
   FF/Factory.cpp
 ]
 headers FF_Factory.o LM//LM TranslationModel/CompactPT//CompactPT synlm ThreadPool rt
@@ -74,5 +74,5 @@ alias headers-to-install : [ glob-tree *.h ] ;
 
 import testing ;
 
-unit-test moses_test : [ glob *Test.cpp Mock*.cpp ] moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
+unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ;
 

From 1c00f8d9a7b3302e06d64b33d981d908900a8e2e Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 26 May 2014 13:47:32 +0100
Subject: [PATCH 13/84] Should be with other LR classes

---
 moses/{ => FF/LexicalReordering}/ReorderingStack.cpp | 0
 moses/{ => FF/LexicalReordering}/ReorderingStack.h   | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename moses/{ => FF/LexicalReordering}/ReorderingStack.cpp (100%)
 rename moses/{ => FF/LexicalReordering}/ReorderingStack.h (94%)

diff --git a/moses/ReorderingStack.cpp b/moses/FF/LexicalReordering/ReorderingStack.cpp
similarity index 100%
rename from moses/ReorderingStack.cpp
rename to moses/FF/LexicalReordering/ReorderingStack.cpp
diff --git a/moses/ReorderingStack.h b/moses/FF/LexicalReordering/ReorderingStack.h
similarity index 94%
rename from moses/ReorderingStack.h
rename to moses/FF/LexicalReordering/ReorderingStack.h
index 730b17ce31..5a5b80d160 100644
--- a/moses/ReorderingStack.h
+++ b/moses/FF/LexicalReordering/ReorderingStack.h
@@ -12,7 +12,7 @@
 //#include "Phrase.h"
 //#include "TypeDef.h"
 //#include "Util.h"
-#include "WordsRange.h"
+#include "moses/WordsRange.h"
 
 namespace Moses
 {

From 1bd851411f24dd0903a1714259d7ce0cebd345df Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 27 May 2014 09:52:18 +0100
Subject: [PATCH 14/84] fix includes

---
 moses/FF/LexicalReordering/LexicalReorderingState.cpp | 2 +-
 moses/FF/LexicalReordering/LexicalReorderingState.h   | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index aa29a4a12c..c13c3ee64d 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -5,11 +5,11 @@
 #include "moses/FF/FFState.h"
 #include "moses/Hypothesis.h"
 #include "moses/WordsRange.h"
-#include "moses/ReorderingStack.h"
 #include "moses/TranslationOption.h"
 
 #include "LexicalReordering.h"
 #include "LexicalReorderingState.h"
+#include "ReorderingStack.h"
 
 namespace Moses
 {
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 8e237adc1a..a581ae2161 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -8,10 +8,10 @@
 #include "LexicalReordering.h"
 #include "moses/WordsRange.h"
 #include "moses/WordsBitmap.h"
-#include "moses/ReorderingStack.h"
 #include "moses/TranslationOption.h"
 #include "moses/FF/FFState.h"
 
+#include "ReorderingStack.h"
 
 namespace Moses
 {
@@ -19,7 +19,6 @@ class LexicalReorderingState;
 class LexicalReordering;
 
 /** Factory class for lexical reordering states
- *  @todo There's a lot of classes for lexicalized reordering. Perhaps put them in a separate dir
  */
 class LexicalReorderingConfiguration
 {
@@ -99,7 +98,7 @@ class LexicalReorderingState : public FFState
   // The following is the true direction of the object, which can be Backward or Forward even if the Configuration has Bidirectional.
   LexicalReorderingConfiguration::Direction m_direction;
   size_t m_offset;
-  const Scores *m_prevScore;
+  const TranslationOption *m_prevOption;
 
   inline LexicalReorderingState(const LexicalReorderingState *prev, const TranslationOption &topt) :
     m_configuration(prev->m_configuration), m_direction(prev->m_direction), m_offset(prev->m_offset),

From 244d9cd824390c1209b6c91eb59e9af339f83325 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 27 May 2014 11:05:56 +0100
Subject: [PATCH 15/84] Stub out sparse reordering class

---
 .../LexicalReordering/LexicalReordering.cpp   |  7 ++++
 .../FF/LexicalReordering/LexicalReordering.h  |  8 +++--
 .../LexicalReorderingState.cpp                | 24 +++++++------
 .../LexicalReorderingState.h                  | 12 ++++---
 .../FF/LexicalReordering/SparseReordering.cpp | 25 ++++++++++++++
 moses/FF/LexicalReordering/SparseReordering.h | 34 +++++++++++++++++++
 6 files changed, 92 insertions(+), 18 deletions(-)
 create mode 100644 moses/FF/LexicalReordering/SparseReordering.cpp
 create mode 100644 moses/FF/LexicalReordering/SparseReordering.h

diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index 6a2a488d91..10b68913ca 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -14,6 +14,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
 {
   std::cerr << "Initializing LexicalReordering.." << std::endl;
 
+  map<string,string> sparseArgs;
   for (size_t i = 0; i < m_args.size(); ++i) {
     const vector<string> &args = m_args[i];
 
@@ -27,6 +28,8 @@ LexicalReordering::LexicalReordering(const std::string &line)
       m_factorsE =Tokenize<FactorType>(args[1]);
     } else if (args[0] == "path") {
       m_filePath = args[1];
+    } else if (args[0].substr(0,7) == "sparse-") {
+      sparseArgs[args[0].substr(7)] = args[1];
     } else {
       throw "Unknown argument " + args[0];
     }
@@ -48,6 +51,10 @@ LexicalReordering::LexicalReordering(const std::string &line)
   default:
     throw "Unknown conditioning option!";
   }
+
+  if (sparseArgs.size()) {
+    m_sparse.reset(new SparseReordering(sparseArgs));
+  }
 }
 
 LexicalReordering::~LexicalReordering()
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 4ff0057f09..39d11a582d 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -3,17 +3,20 @@
 
 #include <string>
 #include <vector>
+#include <boost/scoped_ptr.hpp>
 #include "moses/Factor.h"
 #include "moses/Phrase.h"
 #include "moses/TypeDef.h"
 #include "moses/Util.h"
 #include "moses/WordsRange.h"
 
-#include "LexicalReorderingState.h"
-#include "LexicalReorderingTable.h"
 #include "moses/FF/StatefulFeatureFunction.h"
 #include "util/exception.hh"
 
+#include "LexicalReorderingState.h"
+#include "LexicalReorderingTable.h"
+#include "SparseReordering.h"
+
 
 namespace Moses
 {
@@ -79,6 +82,7 @@ class LexicalReordering : public StatefulFeatureFunction
   //bool m_oneScorePerDirection;
   std::vector<FactorType> m_factorsE, m_factorsF;
   std::string m_filePath;
+  boost::scoped_ptr<SparseReordering> m_sparse;
 };
 
 }
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index c13c3ee64d..cef5c8cbb0 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -128,7 +128,8 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption
   UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
 		  "Unknown direction: " << m_direction);
   const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ?
-                               topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) : m_prevScore;
+                               topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) :
+                               m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer());
 
   // No scores available. TODO: Using a good prior distribution would be nicer.
   if(cachedScores == NULL)
@@ -151,23 +152,24 @@ void LexicalReorderingState::ClearScores(Scores& scores) const
     std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
 }
 
-int LexicalReorderingState::ComparePrevScores(const Scores *other) const
+int LexicalReorderingState::ComparePrevScores(const TranslationOption *other) const
 {
-  if(m_prevScore == other)
+  const Scores* myPrevScores = m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer());
+  const Scores* otherPrevScores = other->GetLexReorderingScores(m_configuration.GetScoreProducer());
+
+  if(myPrevScores == otherPrevScores)
     return 0;
 
   // The pointers are NULL if a phrase pair isn't found in the reordering table.
-  if(other == NULL)
+  if(otherPrevScores == NULL)
     return -1;
-  if(m_prevScore == NULL)
+  if(myPrevScores == NULL)
     return 1;
 
-  const Scores &my = *m_prevScore;
-  const Scores &their = *other;
   for(size_t i = m_offset; i < m_offset + m_configuration.GetNumberOfTypes(); i++)
-    if(my[i] < their[i])
+    if((*myPrevScores)[i] < (*otherPrevScores)[i])
       return -1;
-    else if(my[i] > their[i])
+    else if((*myPrevScores)[i] > (*otherPrevScores)[i])
       return 1;
 
   return 0;
@@ -193,7 +195,7 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const
   UTIL_THROW_IF2(other == NULL, "Wrong state type");
   if (m_prevRange == other->m_prevRange) {
     if (m_direction == LexicalReorderingConfiguration::Forward) {
-      return ComparePrevScores(other->m_prevScore);
+      return ComparePrevScores(other->m_prevOption);
     } else {
       return 0;
     }
@@ -411,7 +413,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const
   UTIL_THROW_IF2(other == NULL, "Wrong state type");
 
   if (m_prevRange == other->m_prevRange) {
-    return ComparePrevScores(other->m_prevScore);
+    return ComparePrevScores(other->m_prevOption);
   } else if (m_prevRange < other->m_prevRange) {
     return -1;
   }
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index a581ae2161..14e3b5189f 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -4,8 +4,9 @@
 #include <vector>
 #include <string>
 
+
 #include "moses/Hypothesis.h"
-#include "LexicalReordering.h"
+//#include "LexicalReordering.h"
 #include "moses/WordsRange.h"
 #include "moses/WordsBitmap.h"
 #include "moses/TranslationOption.h"
@@ -89,28 +90,29 @@ class LexicalReorderingState : public FFState
 
   static LexicalReorderingState* CreateLexicalReorderingState(const std::vector<std::string>& config,
       LexicalReorderingConfiguration::Direction dir, const InputType &input);
+  typedef int ReorderingType;
 
 protected:
-  typedef int ReorderingType;
 
 
   const LexicalReorderingConfiguration &m_configuration;
   // The following is the true direction of the object, which can be Backward or Forward even if the Configuration has Bidirectional.
   LexicalReorderingConfiguration::Direction m_direction;
   size_t m_offset;
+  //forward scores are conditioned on prev option, so need to remember it
   const TranslationOption *m_prevOption;
 
   inline LexicalReorderingState(const LexicalReorderingState *prev, const TranslationOption &topt) :
     m_configuration(prev->m_configuration), m_direction(prev->m_direction), m_offset(prev->m_offset),
-    m_prevScore(topt.GetLexReorderingScores(m_configuration.GetScoreProducer())) {}
+    m_prevOption(&topt) {}
 
   inline LexicalReorderingState(const LexicalReorderingConfiguration &config, LexicalReorderingConfiguration::Direction dir, size_t offset)
-    : m_configuration(config), m_direction(dir), m_offset(offset), m_prevScore(NULL) {}
+    : m_configuration(config), m_direction(dir), m_offset(offset), m_prevOption(NULL) {}
 
   // copy the right scores in the right places, taking into account forward/backward, offset, collapse
   void CopyScores(Scores& scores, const TranslationOption& topt, ReorderingType reoType) const;
   void ClearScores(Scores& scores) const;
-  int ComparePrevScores(const Scores *other) const;
+  int ComparePrevScores(const TranslationOption *other) const;
 
   //constants for the different type of reorderings (corresponding to indexes in the table file)
   static const ReorderingType M = 0;  // monotonic
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
new file mode 100644
index 0000000000..6caf1bef23
--- /dev/null
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -0,0 +1,25 @@
+#include "SparseReordering.h"
+
+using namespace std;
+
+namespace Moses 
+{
+
+SparseReordering::SparseReordering(const map<string,string>& config) 
+{
+  for (map<string,string>::const_iterator i = config.begin(); i != config.end(); ++i) {
+    cerr << i->first << " " << i->second << endl;
+  }
+}
+
+
+void SparseReordering::AddScores(
+              const TranslationOption& topt,
+               LexicalReorderingState::ReorderingType reoType,
+               LexicalReorderingConfiguration::Direction direction,
+               ScoreComponentCollection* scores) const 
+{
+}
+
+} //namespace
+
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
new file mode 100644
index 0000000000..0f30554a22
--- /dev/null
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -0,0 +1,34 @@
+#ifndef moses_FF_LexicalReordering_SparseReordering_h
+#define moses_FF_LexicalReordering_SparseReordering_h
+
+/**
+ * Sparse reordering features for phrase-based MT, following Cherry (NAACL, 2013)
+**/
+
+
+#include <map>
+#include <string>
+
+#include "moses/ScoreComponentCollection.h"
+#include "LexicalReorderingState.h"
+
+namespace Moses
+{
+class SparseReordering
+{
+public:
+  SparseReordering(const std::map<std::string,std::string>& config);
+  
+  void AddScores(const TranslationOption& topt,
+                 LexicalReorderingState::ReorderingType reoType,
+                 LexicalReorderingConfiguration::Direction direction,
+                 ScoreComponentCollection* scores) const ;
+
+};
+
+
+
+} //namespace
+
+
+#endif

From 912c9c1f554a6671f73d02744421966478a1cbd2 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 27 May 2014 13:36:58 +0100
Subject: [PATCH 16/84] Configuration

---
 moses/FF/LexicalReordering/SparseReordering.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 0f30554a22..226f765449 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -8,10 +8,25 @@
 
 #include <map>
 #include <string>
+#include <vector>
 
 #include "moses/ScoreComponentCollection.h"
 #include "LexicalReorderingState.h"
 
+/**
+ Configuration of sparse reordering:
+  
+  The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
+  sparse-words-<id>=<filename>  -- Features which fire for the words in the list
+  sparse-clusters-<id>=<filename> -- Features which fire for clusters in the list. Format
+                                     of cluster file TBD
+  sparse-phrase                    -- Add features which depend on the current phrase
+  sparse-stack                     -- Add features which depend on the previous phrase, or
+                                      top of stack.
+  sparse-between                   -- Add features which depend on words between previous phrase
+                                      (or top of stack) and current phrase.
+**/
+
 namespace Moses
 {
 class SparseReordering
@@ -24,6 +39,9 @@ class SparseReordering
                  LexicalReorderingConfiguration::Direction direction,
                  ScoreComponentCollection* scores) const ;
 
+private:
+
+
 };
 
 

From 10e26ef00d308d7abf1cd1ad7a76bd33eb3a0c43 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 30 May 2014 11:27:59 +0100
Subject: [PATCH 17/84] config of sparse reordering

---
 .../FF/LexicalReordering/SparseReordering.cpp | 39 ++++++++++++++++++-
 moses/FF/LexicalReordering/SparseReordering.h | 13 +++++--
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 6caf1bef23..0405ff29ed 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -1,5 +1,11 @@
+#include <fstream>
+
+#include "moses/Util.h"
+#include "util/exception.hh"
+
 #include "SparseReordering.h"
 
+
 using namespace std;
 
 namespace Moses 
@@ -7,11 +13,42 @@ namespace Moses
 
 SparseReordering::SparseReordering(const map<string,string>& config) 
 {
+  static const string kSource= "source";
+  static const string kTarget = "target";
   for (map<string,string>::const_iterator i = config.begin(); i != config.end(); ++i) {
-    cerr << i->first << " " << i->second << endl;
+    vector<string> fields = Tokenize(i->first, "-");
+    if (fields[0] == "words") {
+      UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering word list name should be sparse-words-(source|target)-<id>");
+      if (fields[1] == kSource) {
+        ReadWordList(i->second,fields[2],&m_sourceWordLists);
+      } else if (fields[1] == kTarget) {
+        ReadWordList(i->second,fields[2],&m_targetWordLists);
+      } else {
+        UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
+      }
+    } else if (fields[0] == "clusters") {
+      UTIL_THROW(util::Exception, "Sparse reordering does not yet support clusters" << i->first);
+    } else if (fields[0] == "phrase") {
+      m_usePhrase = true;
+    } else if (fields[0] == "stack") {
+      m_useStack = true;
+    } else if (fields[0] == "between") {
+      m_useBetween = true;
+    } else {
+      UTIL_THROW(util::Exception, "Unable to parse sparse reordering option: " << i->first);
+    }
   }
 }
 
+void SparseReordering::ReadWordList(const string& filename, const string& id, vector<WordList>* pWordLists) {
+  ifstream fh(filename.c_str());
+  string line;
+  pWordLists->push_back(WordList());
+  pWordLists->back().first = id;
+  while (getline(fh,line)) {
+    pWordLists->back().second.insert(line);
+  }
+}
 
 void SparseReordering::AddScores(
               const TranslationOption& topt,
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 226f765449..a6db663e0f 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -17,8 +17,8 @@
  Configuration of sparse reordering:
   
   The sparse reordering feature is configured using sparse-* configs in the lexical reordering line.
-  sparse-words-<id>=<filename>  -- Features which fire for the words in the list
-  sparse-clusters-<id>=<filename> -- Features which fire for clusters in the list. Format
+  sparse-words-(source|target)-<id>=<filename>  -- Features which fire for the words in the list
+  sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
                                      of cluster file TBD
   sparse-phrase                    -- Add features which depend on the current phrase
   sparse-stack                     -- Add features which depend on the previous phrase, or
@@ -40,7 +40,14 @@ class SparseReordering
                  ScoreComponentCollection* scores) const ;
 
 private:
-
+  typedef std::pair<std::string, std::set<std::string> > WordList; //id and list
+  std::vector<WordList> m_sourceWordLists;
+  std::vector<WordList> m_targetWordLists;
+  bool m_usePhrase;
+  bool m_useBetween;
+  bool m_useStack;
+
+  void ReadWordList(const std::string& filename, const std::string& id, std::vector<WordList>* pWordLists);
 
 };
 

From 18d1bceea099a17009b93605d41ebfea448727a0 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 6 Jun 2014 14:20:23 +0100
Subject: [PATCH 18/84] Move sparse reordering object to LR config

---
 moses/FF/LexicalReordering/LexicalReordering.cpp      | 10 +++-------
 moses/FF/LexicalReordering/LexicalReordering.h        |  5 ++---
 moses/FF/LexicalReordering/LexicalReorderingState.cpp |  7 +++++++
 moses/FF/LexicalReordering/LexicalReorderingState.h   |  5 +++++
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index 10b68913ca..c5daee95bb 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -19,7 +19,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
     const vector<string> &args = m_args[i];
 
     if (args[0] == "type") {
-      m_configuration = new LexicalReorderingConfiguration(args[1]);
+      m_configuration.reset(new LexicalReorderingConfiguration(args[1]));
       m_configuration->SetScoreProducer(this);
       m_modelTypeString = m_configuration->GetModelString();
     } else if (args[0] == "input-factor") {
@@ -52,20 +52,16 @@ LexicalReordering::LexicalReordering(const std::string &line)
     throw "Unknown conditioning option!";
   }
 
-  if (sparseArgs.size()) {
-    m_sparse.reset(new SparseReordering(sparseArgs));
-  }
+  m_configuration->ConfigureSparse(sparseArgs);
 }
 
 LexicalReordering::~LexicalReordering()
 {
-  delete m_table;
-  delete m_configuration;
 }
 
 void LexicalReordering::Load()
 {
-  m_table = LexicalReorderingTable::LoadAvailable(m_filePath, m_factorsF, m_factorsE, std::vector<FactorType>());
+  m_table.reset(LexicalReorderingTable::LoadAvailable(m_filePath, m_factorsF, m_factorsE, std::vector<FactorType>()));
 }
 
 Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 39d11a582d..6255987a4f 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -72,17 +72,16 @@ class LexicalReordering : public StatefulFeatureFunction
   bool DecodeDirection(std::string s);
   bool DecodeNumFeatureFunctions(std::string s);
 
-  LexicalReorderingConfiguration *m_configuration;
+  boost::scoped_ptr<LexicalReorderingConfiguration> m_configuration;
   std::string m_modelTypeString;
   std::vector<std::string> m_modelType;
-  LexicalReorderingTable* m_table;
+  boost::scoped_ptr<LexicalReorderingTable> m_table;
   //std::vector<Direction> m_direction;
   std::vector<LexicalReorderingConfiguration::Condition> m_condition;
   //std::vector<size_t> m_scoreOffset;
   //bool m_oneScorePerDirection;
   std::vector<FactorType> m_factorsE, m_factorsF;
   std::string m_filePath;
-  boost::scoped_ptr<SparseReordering> m_sparse;
 };
 
 }
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index cef5c8cbb0..d334749431 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -38,6 +38,13 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
   }
 }
 
+void LexicalReorderingConfiguration::ConfigureSparse(const std::map<std::string,std::string>& sparseArgs) 
+{
+  if (sparseArgs.size()) {
+    m_sparse.reset(new SparseReordering(sparseArgs));
+  }
+}
+
 void LexicalReorderingConfiguration::SetAdditionalScoreComponents(size_t number)
 {
   m_additionalScoreComponents = number;
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 14e3b5189f..5c179c39cf 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -4,6 +4,7 @@
 #include <vector>
 #include <string>
 
+#include <boost/scoped_ptr.hpp>
 
 #include "moses/Hypothesis.h"
 //#include "LexicalReordering.h"
@@ -18,6 +19,7 @@ namespace Moses
 {
 class LexicalReorderingState;
 class LexicalReordering;
+class SparseReordering;
 
 /** Factory class for lexical reordering states
  */
@@ -31,6 +33,8 @@ class LexicalReorderingConfiguration
 
   LexicalReorderingConfiguration(const std::string &modelType);
 
+  void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs);
+
   LexicalReorderingState *CreateLexicalReorderingState(const InputType &input) const;
 
   size_t GetNumScoreComponents() const;
@@ -79,6 +83,7 @@ class LexicalReorderingConfiguration
   Direction m_direction;
   Condition m_condition;
   size_t m_additionalScoreComponents;
+  boost::scoped_ptr<SparseReordering> m_sparse;
 };
 
 //! Abstract class for lexical reordering model states

From 4aa4fe0a046921f3850534847e239bd8879b54ad Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 6 Jun 2014 20:25:45 +0100
Subject: [PATCH 19/84] Pass scc, not scores

---
 .../LexicalReordering/LexicalReordering.cpp   |  5 +-
 .../LexicalReorderingState.cpp                | 46 ++++++++-----------
 .../LexicalReorderingState.h                  | 14 +++---
 3 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index c5daee95bb..6c73863600 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -73,11 +73,8 @@ FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
                                      const FFState* prev_state,
                                      ScoreComponentCollection* out) const
 {
-  Scores score(GetNumScoreComponents(), 0);
   const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
-  LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), score);
-
-  out->PlusEquals(this, score);
+  LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), out);
 
   return next_state;
 }
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index d334749431..c6782974ba 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -129,7 +129,7 @@ LexicalReorderingState *LexicalReorderingConfiguration::CreateLexicalReorderingS
   return new BidirectionalReorderingState(*this, bwd, fwd, 0);
 }
 
-void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption &topt, ReorderingType reoType) const
+void LexicalReorderingState::CopyScores(ScoreComponentCollection*  accum, const TranslationOption &topt, ReorderingType reoType) const
 {
   // don't call this on a bidirectional object
   UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
@@ -142,6 +142,8 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption
   if(cachedScores == NULL)
     return;
 
+  Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
+
   const Scores &scoreSet = *cachedScores;
   if(m_configuration.CollapseScores())
     scores[m_offset] = scoreSet[m_offset + reoType];
@@ -149,15 +151,9 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption
     std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
     scores[m_offset + reoType] = scoreSet[m_offset + reoType];
   }
+  accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
 }
 
-void LexicalReorderingState::ClearScores(Scores& scores) const
-{
-  if(m_configuration.CollapseScores())
-    scores[m_offset] = 0;
-  else
-    std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
-}
 
 int LexicalReorderingState::ComparePrevScores(const TranslationOption *other) const
 {
@@ -212,27 +208,23 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const
   return 1;
 }
 
-LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const
 {
   ReorderingType reoType;
   const WordsRange currWordsRange = topt.GetSourceWordsRange();
   const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
 
-  if (m_direction == LexicalReorderingConfiguration::Forward && m_first) {
-    ClearScores(scores);
-  } else {
-    if (!m_first || m_useFirstBackwardScore) {
-      if (modelType == LexicalReorderingConfiguration::MSD) {
-        reoType = GetOrientationTypeMSD(currWordsRange);
-      } else if (modelType == LexicalReorderingConfiguration::MSLR) {
-        reoType = GetOrientationTypeMSLR(currWordsRange);
-      } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
-        reoType = GetOrientationTypeMonotonic(currWordsRange);
-      } else {
-        reoType = GetOrientationTypeLeftRight(currWordsRange);
-      }
-      CopyScores(scores, topt, reoType);
+  if ((m_direction != LexicalReorderingConfiguration::Forward && m_useFirstBackwardScore)  || !m_first) {
+    if (modelType == LexicalReorderingConfiguration::MSD) {
+      reoType = GetOrientationTypeMSD(currWordsRange);
+    } else if (modelType == LexicalReorderingConfiguration::MSLR) {
+      reoType = GetOrientationTypeMSLR(currWordsRange);
+    } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
+      reoType = GetOrientationTypeMonotonic(currWordsRange);
+    } else {
+      reoType = GetOrientationTypeLeftRight(currWordsRange);
     }
+    CopyScores(scores, topt, reoType);
   }
 
   return new PhraseBasedReorderingState(this, topt);
@@ -310,7 +302,7 @@ int BidirectionalReorderingState::Compare(const FFState& o) const
     return m_forward->Compare(*other.m_forward);
 }
 
-LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt,  ScoreComponentCollection* scores) const
 {
   LexicalReorderingState *newbwd = m_backward->Expand(topt, scores);
   LexicalReorderingState *newfwd = m_forward->Expand(topt, scores);
@@ -334,7 +326,7 @@ int HierarchicalReorderingBackwardState::Compare(const FFState& o) const
   return m_reoStack.Compare(other.m_reoStack);
 }
 
-LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, ScoreComponentCollection*  scores) const
 {
 
   HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack);
@@ -438,7 +430,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const
 //  dright: if the next phrase follows the conditioning phrase and other stuff comes in between
 //  dleft:  if the next phrase precedes the conditioning phrase and other stuff comes in between
 
-LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, Scores& scores) const
+LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const
 {
   const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
   const WordsRange currWordsRange = topt.GetSourceWordsRange();
@@ -449,7 +441,7 @@ LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const Transla
   ReorderingType reoType;
 
   if (m_first) {
-    ClearScores(scores);
+
   } else {
     if (modelType == LexicalReorderingConfiguration::MSD) {
       reoType = GetOrientationTypeMSD(currWordsRange, coverage);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 5c179c39cf..e8d9269b87 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -8,6 +8,7 @@
 
 #include "moses/Hypothesis.h"
 //#include "LexicalReordering.h"
+#include "moses/ScoreComponentCollection.h"
 #include "moses/WordsRange.h"
 #include "moses/WordsBitmap.h"
 #include "moses/TranslationOption.h"
@@ -91,7 +92,7 @@ class LexicalReorderingState : public FFState
 {
 public:
   virtual int Compare(const FFState& o) const = 0;
-  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const = 0;
+  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const = 0;
 
   static LexicalReorderingState* CreateLexicalReorderingState(const std::vector<std::string>& config,
       LexicalReorderingConfiguration::Direction dir, const InputType &input);
@@ -115,8 +116,7 @@ class LexicalReorderingState : public FFState
     : m_configuration(config), m_direction(dir), m_offset(offset), m_prevOption(NULL) {}
 
   // copy the right scores in the right places, taking into account forward/backward, offset, collapse
-  void CopyScores(Scores& scores, const TranslationOption& topt, ReorderingType reoType) const;
-  void ClearScores(Scores& scores) const;
+  void CopyScores(ScoreComponentCollection* scores, const TranslationOption& topt, ReorderingType reoType) const;
   int ComparePrevScores(const TranslationOption *other) const;
 
   //constants for the different type of reorderings (corresponding to indexes in the table file)
@@ -146,7 +146,7 @@ class BidirectionalReorderingState : public LexicalReorderingState
   }
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& topt, Scores& scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection*  scores) const;
 };
 
 //! State for the standard Moses implementation of lexical reordering models
@@ -162,7 +162,7 @@ class PhraseBasedReorderingState : public LexicalReorderingState
   PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt);
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& topt, Scores& scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection*  scores) const;
 
   ReorderingType GetOrientationTypeMSD(WordsRange currRange) const;
   ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const;
@@ -183,7 +183,7 @@ class HierarchicalReorderingBackwardState : public LexicalReorderingState
                                       const TranslationOption &topt, ReorderingStack reoStack);
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection*  scores) const;
 
 private:
   ReorderingType GetOrientationTypeMSD(int reoDistance) const;
@@ -206,7 +206,7 @@ class HierarchicalReorderingForwardState : public LexicalReorderingState
   HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt);
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const;
 
 private:
   ReorderingType GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const;

From 3dec0abf0acdf7699e9643650ccae22f5a4f5785 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Fri, 6 Jun 2014 21:08:09 +0100
Subject: [PATCH 20/84] Call sparse reordering

---
 .../LexicalReorderingState.cpp                | 36 ++++++++++---------
 .../LexicalReorderingState.h                  |  4 +++
 .../FF/LexicalReordering/SparseReordering.cpp |  5 +--
 moses/FF/LexicalReordering/SparseReordering.h |  7 ++--
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index c6782974ba..63f47c8855 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -134,24 +134,26 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection*  accum, const
   // don't call this on a bidirectional object
   UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
 		  "Unknown direction: " << m_direction);
-  const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ?
-                               topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) :
-                               m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer());
-
-  // No scores available. TODO: Using a good prior distribution would be nicer.
-  if(cachedScores == NULL)
-    return;
-
-  Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
-
-  const Scores &scoreSet = *cachedScores;
-  if(m_configuration.CollapseScores())
-    scores[m_offset] = scoreSet[m_offset + reoType];
-  else {
-    std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
-    scores[m_offset + reoType] = scoreSet[m_offset + reoType];
+  const TranslationOption* relevantOpt = &topt;
+  if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
+  const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
+
+  if(cachedScores) {
+    Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
+
+    const Scores &scoreSet = *cachedScores;
+    if(m_configuration.CollapseScores())
+      scores[m_offset] = scoreSet[m_offset + reoType];
+    else {
+      std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
+      scores[m_offset + reoType] = scoreSet[m_offset + reoType];
+    }
+    accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
   }
-  accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
+
+  const SparseReordering* sparse = m_configuration.GetSparseReordering();
+  if (sparse) sparse->CopyScores(*relevantOpt, reoType, m_direction, accum);
+
 }
 
 
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index e8d9269b87..058ae01c40 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -67,6 +67,10 @@ class LexicalReorderingConfiguration
     return m_collapseScores;
   }
 
+  const SparseReordering* GetSparseReordering() const {
+    return m_sparse.get();
+  }
+
 private:
   void SetScoreProducer(LexicalReordering* scoreProducer) {
     m_scoreProducer = scoreProducer;
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 0405ff29ed..14a3b2667c 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -50,12 +50,13 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, ve
   }
 }
 
-void SparseReordering::AddScores(
-              const TranslationOption& topt,
+void SparseReordering::CopyScores(
+               const TranslationOption& topt,
                LexicalReorderingState::ReorderingType reoType,
                LexicalReorderingConfiguration::Direction direction,
                ScoreComponentCollection* scores) const 
 {
+  //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
 }
 
 } //namespace
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index a6db663e0f..0a5803e1bf 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -20,9 +20,9 @@
   sparse-words-(source|target)-<id>=<filename>  -- Features which fire for the words in the list
   sparse-clusters-(source|target)-<id>=<filename> -- Features which fire for clusters in the list. Format
                                      of cluster file TBD
-  sparse-phrase                    -- Add features which depend on the current phrase
+  sparse-phrase                    -- Add features which depend on the current phrase (backward)
   sparse-stack                     -- Add features which depend on the previous phrase, or
-                                      top of stack.
+                                      top of stack. (forward)
   sparse-between                   -- Add features which depend on words between previous phrase
                                       (or top of stack) and current phrase.
 **/
@@ -34,7 +34,8 @@ class SparseReordering
 public:
   SparseReordering(const std::map<std::string,std::string>& config);
   
-  void AddScores(const TranslationOption& topt,
+  //If direction is backward topt is the current option, otherwise the previous
+  void CopyScores(const TranslationOption& topt,
                  LexicalReorderingState::ReorderingType reoType,
                  LexicalReorderingConfiguration::Direction direction,
                  ScoreComponentCollection* scores) const ;

From a5e5a6590b0d3a3a92b183e3cf7b3401ecf18589 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 9 Jun 2014 22:17:05 +0100
Subject: [PATCH 21/84] basic implementation of non-hierarchical sparse
 features

---
 .../LexicalReordering/LexicalReordering.cpp   |  2 +-
 .../LexicalReorderingState.cpp                |  5 +-
 .../LexicalReorderingState.h                  |  2 +-
 .../FF/LexicalReordering/SparseReordering.cpp | 49 ++++++++++++++++++-
 moses/FF/LexicalReordering/SparseReordering.h | 11 ++++-
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index 6c73863600..d3e52c23c3 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -52,7 +52,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
     throw "Unknown conditioning option!";
   }
 
-  m_configuration->ConfigureSparse(sparseArgs);
+  m_configuration->ConfigureSparse(sparseArgs, this);
 }
 
 LexicalReordering::~LexicalReordering()
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index 63f47c8855..349f06af76 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -38,10 +38,11 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
   }
 }
 
-void LexicalReorderingConfiguration::ConfigureSparse(const std::map<std::string,std::string>& sparseArgs) 
+void LexicalReorderingConfiguration::ConfigureSparse
+  (const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer) 
 {
   if (sparseArgs.size()) {
-    m_sparse.reset(new SparseReordering(sparseArgs));
+    m_sparse.reset(new SparseReordering(sparseArgs, producer));
   }
 }
 
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 058ae01c40..e37ea71783 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -34,7 +34,7 @@ class LexicalReorderingConfiguration
 
   LexicalReorderingConfiguration(const std::string &modelType);
 
-  void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs);
+  void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer);
 
   LexicalReorderingState *CreateLexicalReorderingState(const InputType &input) const;
 
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 14a3b2667c..443cf49d96 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -1,8 +1,11 @@
 #include <fstream>
 
+#include "moses/FactorCollection.h"
+#include "moses/InputPath.h"
 #include "moses/Util.h"
 #include "util/exception.hh"
 
+#include "LexicalReordering.h"
 #include "SparseReordering.h"
 
 
@@ -11,7 +14,8 @@ using namespace std;
 namespace Moses 
 {
 
-SparseReordering::SparseReordering(const map<string,string>& config) 
+SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
+  : m_producer(producer) 
 {
   static const string kSource= "source";
   static const string kTarget = "target";
@@ -42,14 +46,32 @@ SparseReordering::SparseReordering(const map<string,string>& config)
 
 void SparseReordering::ReadWordList(const string& filename, const string& id, vector<WordList>* pWordLists) {
   ifstream fh(filename.c_str());
+  UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
   string line;
   pWordLists->push_back(WordList());
   pWordLists->back().first = id;
   while (getline(fh,line)) {
-    pWordLists->back().second.insert(line);
+    //TODO: StringPiece
+    const Factor* factor = FactorCollection::Instance().AddFactor(line);
+    pWordLists->back().second.insert(factor);
   }
 }
 
+void SparseReordering::AddFeatures(
+    const string& type, const Word& word, const string& position,  const WordList& words,
+    LexicalReorderingState::ReorderingType reoType,
+    ScoreComponentCollection* scores) const {
+
+  //TODO: Precalculate all feature names
+  static string kSep = "-";
+  const Factor*  wordFactor = word.GetFactor(0);
+  if (words.second.find(wordFactor) == words.second.end()) return;
+  ostringstream buf;
+  buf  << type << kSep << position << kSep << words.first << kSep << wordFactor->GetString() << kSep << reoType;
+  scores->PlusEquals(m_producer, buf.str(), 1.0);
+
+}
+
 void SparseReordering::CopyScores(
                const TranslationOption& topt,
                LexicalReorderingState::ReorderingType reoType,
@@ -57,6 +79,29 @@ void SparseReordering::CopyScores(
                ScoreComponentCollection* scores) const 
 {
   //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
+  const string kPhrase = "phr"; //phrase (backward)
+  const string kStack = "stk"; //stack (forward)
+
+  const string* type = &kPhrase;
+  //TODO: bidirectional?
+  if (direction == LexicalReorderingConfiguration::Forward) {
+    if (!m_useStack) return;
+    type = &kStack;
+  } else if (direction == LexicalReorderingConfiguration::Backward && !m_usePhrase) {
+    return;
+  }
+  for (vector<WordList>::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) {
+    const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase();
+    AddFeatures(*type, sourcePhrase.GetWord(0), "src.first", *i, reoType, scores);
+    AddFeatures(*type, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), "src.last", *i, reoType, scores);
+  }
+  for (vector<WordList>::const_iterator i = m_targetWordLists.begin(); i != m_targetWordLists.end(); ++i) {
+    const Phrase& targetPhrase = topt.GetTargetPhrase();   
+    AddFeatures(*type, targetPhrase.GetWord(0), "tgt.first", *i, reoType, scores);
+    AddFeatures(*type, targetPhrase.GetWord(targetPhrase.GetSize()-1), "tgt.last", *i, reoType, scores);
+  }
+
+
 }
 
 } //namespace
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 0a5803e1bf..ec6c3c04e3 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -10,6 +10,8 @@
 #include <string>
 #include <vector>
 
+#include <boost/unordered_set.hpp>
+
 #include "moses/ScoreComponentCollection.h"
 #include "LexicalReorderingState.h"
 
@@ -32,7 +34,7 @@ namespace Moses
 class SparseReordering
 {
 public:
-  SparseReordering(const std::map<std::string,std::string>& config);
+  SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
   
   //If direction is backward topt is the current option, otherwise the previous
   void CopyScores(const TranslationOption& topt,
@@ -41,7 +43,8 @@ class SparseReordering
                  ScoreComponentCollection* scores) const ;
 
 private:
-  typedef std::pair<std::string, std::set<std::string> > WordList; //id and list
+  const LexicalReordering* m_producer;
+  typedef std::pair<std::string, boost::unordered_set<const Factor*> > WordList; //id and list
   std::vector<WordList> m_sourceWordLists;
   std::vector<WordList> m_targetWordLists;
   bool m_usePhrase;
@@ -49,6 +52,10 @@ class SparseReordering
   bool m_useStack;
 
   void ReadWordList(const std::string& filename, const std::string& id, std::vector<WordList>* pWordLists);
+  void AddFeatures(
+    const std::string& type, const Word& word, const std::string& position, const WordList& words,
+    LexicalReorderingState::ReorderingType reoType,
+    ScoreComponentCollection* scores) const;
 
 };
 

From 91ccf8ef72356f1f61c42b3e0dd9a56aac32c451 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 10 Jun 2014 10:23:48 +0100
Subject: [PATCH 22/84] bidirectional case

---
 moses/FF/LexicalReordering/SparseReordering.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 443cf49d96..677698400f 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -82,13 +82,16 @@ void SparseReordering::CopyScores(
   const string kPhrase = "phr"; //phrase (backward)
   const string kStack = "stk"; //stack (forward)
 
-  const string* type = &kPhrase;
-  //TODO: bidirectional?
+  const string* type = NULL;// &kPhrase;
   if (direction == LexicalReorderingConfiguration::Forward) {
     if (!m_useStack) return;
     type = &kStack;
-  } else if (direction == LexicalReorderingConfiguration::Backward && !m_usePhrase) {
-    return;
+  } else if (direction == LexicalReorderingConfiguration::Backward) {
+    if (!m_usePhrase) return;
+    type = &kPhrase;
+  } else {
+    //Shouldn't be called for bidirectional
+    assert(!"Shouldn't call CopyScores() with bidirectional direction");
   }
   for (vector<WordList>::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) {
     const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase();

From 2785989e219d217bcf02927506ef12ff43100aaa Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Thu, 12 Jun 2014 21:37:18 +0100
Subject: [PATCH 23/84] precalculation of feature names

---
 .../LexicalReorderingState.h                  |  2 +
 .../FF/LexicalReordering/SparseReordering.cpp | 98 +++++++++++++++----
 moses/FF/LexicalReordering/SparseReordering.h | 62 +++++++++++-
 3 files changed, 137 insertions(+), 25 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index e37ea71783..cf91eaf69b 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -132,6 +132,8 @@ class LexicalReorderingState : public FFState
   static const ReorderingType DR = 3; // discontinuous, right
   static const ReorderingType R = 0;  // right
   static const ReorderingType L = 1;  // left
+  public:
+  static const ReorderingType MAX = 3; //largest possible
 };
 
 //! @todo what is this?
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 677698400f..f1a334ece8 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -14,6 +14,40 @@ using namespace std;
 namespace Moses 
 {
 
+const std::string& SparseReorderingFeatureKey::Name(const string& wordListId) {
+  static string kSep = "-";
+  static string name;
+  ostringstream buf;
+  // type side position id word reotype
+  if (type == Phrase) {
+    buf << "phr";
+  } else if (type == Stack) {
+    buf << "stk";
+  } else if (type == Between) {
+    buf << "btn";
+  }
+  buf << kSep;
+  if (side == Source) {
+    buf << "src";
+  } else if (side == Target) {
+    buf << "tgt";
+  }
+  buf << kSep;
+  if (position == First) {
+    buf << "first";
+  } else if (position == Last) {
+    buf << "last";
+  }
+  buf << kSep;
+  buf << wordListId;
+  buf << kSep;
+  buf << word->GetString();
+  buf << kSep;
+  buf << reoType;
+  name = buf.str();
+  return name;
+}
+
 SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
   : m_producer(producer) 
 {
@@ -24,9 +58,9 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
     if (fields[0] == "words") {
       UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering word list name should be sparse-words-(source|target)-<id>");
       if (fields[1] == kSource) {
-        ReadWordList(i->second,fields[2],&m_sourceWordLists);
+        ReadWordList(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceWordLists);
       } else if (fields[1] == kTarget) {
-        ReadWordList(i->second,fields[2],&m_targetWordLists);
+        ReadWordList(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetWordLists);
       } else {
         UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
       }
@@ -42,9 +76,10 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
       UTIL_THROW(util::Exception, "Unable to parse sparse reordering option: " << i->first);
     }
   }
+
 }
 
-void SparseReordering::ReadWordList(const string& filename, const string& id, vector<WordList>* pWordLists) {
+void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) {
   ifstream fh(filename.c_str());
   UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
   string line;
@@ -54,21 +89,37 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, ve
     //TODO: StringPiece
     const Factor* factor = FactorCollection::Instance().AddFactor(line);
     pWordLists->back().second.insert(factor);
+    //Pre-calculate feature names.
+    for (size_t type = SparseReorderingFeatureKey::Stack;
+                       type <= SparseReorderingFeatureKey::Between; ++type) {
+      for (size_t position = SparseReorderingFeatureKey::First;
+                       position <= SparseReorderingFeatureKey::Last; ++position) {
+        for (int reoType = 0; reoType < LexicalReorderingState::MAX; ++reoType) {
+          SparseReorderingFeatureKey key(
+            pWordLists->size()-1, static_cast<SparseReorderingFeatureKey::Type>(type),
+            factor, static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
+          m_featureMap[key] = key.Name(id);
+        }
+      }
+    }
+
   }
 }
 
-void SparseReordering::AddFeatures(
-    const string& type, const Word& word, const string& position,  const WordList& words,
-    LexicalReorderingState::ReorderingType reoType,
+void SparseReordering::AddFeatures(size_t id,
+    SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+    const Word& word, SparseReorderingFeatureKey::Position position,
+    const WordList& words, LexicalReorderingState::ReorderingType reoType,
     ScoreComponentCollection* scores) const {
 
   //TODO: Precalculate all feature names
   static string kSep = "-";
   const Factor*  wordFactor = word.GetFactor(0);
   if (words.second.find(wordFactor) == words.second.end()) return;
-  ostringstream buf;
-  buf  << type << kSep << position << kSep << words.first << kSep << wordFactor->GetString() << kSep << reoType;
-  scores->PlusEquals(m_producer, buf.str(), 1.0);
+  SparseReorderingFeatureKey key(id, type, wordFactor, position, side, reoType);
+  FeatureMap::const_iterator fmi = m_featureMap.find(key);
+  assert(fmi != m_featureMap.end());
+  scores->PlusEquals(m_producer, fmi->second, 1.0);
 
 }
 
@@ -79,29 +130,34 @@ void SparseReordering::CopyScores(
                ScoreComponentCollection* scores) const 
 {
   //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
-  const string kPhrase = "phr"; //phrase (backward)
-  const string kStack = "stk"; //stack (forward)
-
-  const string* type = NULL;// &kPhrase;
+  //phrase (backward)
+  //stack (forward)
+  SparseReorderingFeatureKey::Type type;
   if (direction == LexicalReorderingConfiguration::Forward) {
     if (!m_useStack) return;
-    type = &kStack;
+    type = SparseReorderingFeatureKey::Stack;
   } else if (direction == LexicalReorderingConfiguration::Backward) {
     if (!m_usePhrase) return;
-    type = &kPhrase;
+    type = SparseReorderingFeatureKey::Phrase;
   } else {
     //Shouldn't be called for bidirectional
+    //keep compiler happy
+    type = SparseReorderingFeatureKey::Phrase;
     assert(!"Shouldn't call CopyScores() with bidirectional direction");
   }
-  for (vector<WordList>::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) {
+  for (size_t i = 0; i < m_sourceWordLists.size(); ++i) {
     const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase();
-    AddFeatures(*type, sourcePhrase.GetWord(0), "src.first", *i, reoType, scores);
-    AddFeatures(*type, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), "src.last", *i, reoType, scores);
+    AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
+      SparseReorderingFeatureKey::First, m_sourceWordLists[i], reoType, scores);
+    AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1),
+      SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores);
   }
-  for (vector<WordList>::const_iterator i = m_targetWordLists.begin(); i != m_targetWordLists.end(); ++i) {
+  for (size_t i = 0; i < m_sourceWordLists.size(); ++i) {
     const Phrase& targetPhrase = topt.GetTargetPhrase();   
-    AddFeatures(*type, targetPhrase.GetWord(0), "tgt.first", *i, reoType, scores);
-    AddFeatures(*type, targetPhrase.GetWord(targetPhrase.GetSize()-1), "tgt.last", *i, reoType, scores);
+    AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
+      SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores);
+    AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1),
+      SparseReorderingFeatureKey::Last, m_targetWordLists[i], reoType, scores);
   }
 
 
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index ec6c3c04e3..76c72a2019 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -6,12 +6,17 @@
 **/
 
 
+#include <functional>
 #include <map>
 #include <string>
 #include <vector>
 
 #include <boost/unordered_set.hpp>
 
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+
 #include "moses/ScoreComponentCollection.h"
 #include "LexicalReorderingState.h"
 
@@ -31,6 +36,51 @@
 
 namespace Moses
 {
+
+/** 
+ * Used to store pre-calculated feature names.
+**/
+struct SparseReorderingFeatureKey {
+  size_t id;
+  enum Type {Stack, Phrase, Between} type;
+  const Factor* word;
+  enum Position {First, Last} position;
+  enum Side {Source, Target} side;
+  LexicalReorderingState::ReorderingType reoType;
+
+  SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, Position position_, 
+        Side side_, LexicalReorderingState::ReorderingType reoType_) 
+    : id(id_), type(type_), word(word_), position(position_), side(side_), reoType(reoType_)     
+  {}
+
+  const std::string& Name(const std::string& wordListId) ; 
+};
+
+struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorderingFeatureKey, std::size_t> {
+  std::size_t operator()(const SparseReorderingFeatureKey& key) const {
+    //TODO: can we just hash the memory? 
+    //not sure, there could be random padding
+    std::size_t seed = 0;
+    seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
+    seed = util::MurmurHashNative(&key.type, sizeof(key.type), seed);
+    seed = util::MurmurHashNative(&key.word, sizeof(key.word), seed);
+    seed = util::MurmurHashNative(&key.position, sizeof(key.position), seed);
+    seed = util::MurmurHashNative(&key.side, sizeof(key.side), seed);
+    seed = util::MurmurHashNative(&key.reoType, sizeof(key.reoType), seed);
+    return seed;
+  }
+};
+
+struct EqualsSparseReorderingFeatureKey :
+   public std::binary_function<SparseReorderingFeatureKey, SparseReorderingFeatureKey, bool> {
+  bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const {
+    //TODO: Can we just compare the memory?
+    return left.id == right.id &&  left.type == right.type && left.word == right.word &&
+           left.position == right.position && left.side == right.side &&
+           left.reoType == right.reoType;
+  }
+};
+
 class SparseReordering
 {
 public:
@@ -50,11 +100,15 @@ class SparseReordering
   bool m_usePhrase;
   bool m_useBetween;
   bool m_useStack;
+  typedef boost::unordered_map<SparseReorderingFeatureKey, std::string, HashSparseReorderingFeatureKey, EqualsSparseReorderingFeatureKey> FeatureMap;
+  FeatureMap m_featureMap;
 
-  void ReadWordList(const std::string& filename, const std::string& id, std::vector<WordList>* pWordLists);
-  void AddFeatures(
-    const std::string& type, const Word& word, const std::string& position, const WordList& words,
-    LexicalReorderingState::ReorderingType reoType,
+  void ReadWordList(const std::string& filename, const std::string& id,
+       SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
+  void AddFeatures(size_t id,
+    SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
+     const Word& word, SparseReorderingFeatureKey::Position position,
+     const WordList& words, LexicalReorderingState::ReorderingType reoType,
     ScoreComponentCollection* scores) const;
 
 };

From 1afa0bc8f6c463e163f4d35d2bd5586f6397d883 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 23 Jun 2014 17:06:00 +0100
Subject: [PATCH 24/84] Fix for hreo

---
 moses/FF/LexicalReordering/SparseReordering.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index f1a334ece8..8a61f52151 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -94,7 +94,7 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp
                        type <= SparseReorderingFeatureKey::Between; ++type) {
       for (size_t position = SparseReorderingFeatureKey::First;
                        position <= SparseReorderingFeatureKey::Last; ++position) {
-        for (int reoType = 0; reoType < LexicalReorderingState::MAX; ++reoType) {
+        for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
           SparseReorderingFeatureKey key(
             pWordLists->size()-1, static_cast<SparseReorderingFeatureKey::Type>(type),
             factor, static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
@@ -112,7 +112,6 @@ void SparseReordering::AddFeatures(size_t id,
     const WordList& words, LexicalReorderingState::ReorderingType reoType,
     ScoreComponentCollection* scores) const {
 
-  //TODO: Precalculate all feature names
   static string kSep = "-";
   const Factor*  wordFactor = word.GetFactor(0);
   if (words.second.find(wordFactor) == words.second.end()) return;

From cc426190e2d11c93eb5cf6da5219a9ae39cc97c5 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 23 Jun 2014 17:46:45 +0100
Subject: [PATCH 25/84] Minor fix

---
 moses/FF/LexicalReordering/SparseReordering.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 8a61f52151..426aad1830 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -112,7 +112,6 @@ void SparseReordering::AddFeatures(size_t id,
     const WordList& words, LexicalReorderingState::ReorderingType reoType,
     ScoreComponentCollection* scores) const {
 
-  static string kSep = "-";
   const Factor*  wordFactor = word.GetFactor(0);
   if (words.second.find(wordFactor) == words.second.end()) return;
   SparseReorderingFeatureKey key(id, type, wordFactor, position, side, reoType);
@@ -151,7 +150,7 @@ void SparseReordering::CopyScores(
     AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1),
       SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores);
   }
-  for (size_t i = 0; i < m_sourceWordLists.size(); ++i) {
+  for (size_t i = 0; i < m_targetWordLists.size(); ++i) {
     const Phrase& targetPhrase = topt.GetTargetPhrase();   
     AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
       SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores);

From 69222ee32bc1d8086c7a196150019ee5bbc1b447 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 24 Jun 2014 13:37:54 +0100
Subject: [PATCH 26/84] Implement 'between' featuresr.

---
 .../LexicalReordering/LexicalReordering.cpp   |  2 +-
 .../LexicalReorderingState.cpp                | 22 +++++------
 .../LexicalReorderingState.h                  | 14 +++----
 .../FF/LexicalReordering/SparseReordering.cpp | 37 +++++++++++++++++--
 moses/FF/LexicalReordering/SparseReordering.h |  6 ++-
 5 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index 4278af1a51..b0b18c65fa 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -75,7 +75,7 @@ FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
 {
   Scores score(GetNumScoreComponents(), 0);
   const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state);
-  LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), out);
+  LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out);
 
   out->PlusEquals(this, score);
 
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index dfdc0ddb8f..fa88fdeab0 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -130,7 +130,7 @@ LexicalReorderingState *LexicalReorderingConfiguration::CreateLexicalReorderingS
   return new BidirectionalReorderingState(*this, bwd, fwd, 0);
 }
 
-void LexicalReorderingState::CopyScores(ScoreComponentCollection*  accum, const TranslationOption &topt, ReorderingType reoType) const
+void LexicalReorderingState::CopyScores(ScoreComponentCollection*  accum, const TranslationOption &topt, const InputType& input,  ReorderingType reoType) const
 {
   // don't call this on a bidirectional object
   UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward,
@@ -153,7 +153,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection*  accum, const
   }
 
   const SparseReordering* sparse = m_configuration.GetSparseReordering();
-  if (sparse) sparse->CopyScores(*relevantOpt, reoType, m_direction, accum);
+  if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, m_direction, accum);
 
 }
 
@@ -210,7 +210,7 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const
   return 1;
 }
 
-LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const
+LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
 {
   ReorderingType reoType;
   const WordsRange currWordsRange = topt.GetSourceWordsRange();
@@ -226,7 +226,7 @@ LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOpti
     } else {
       reoType = GetOrientationTypeLeftRight(currWordsRange);
     }
-    CopyScores(scores, topt, reoType);
+    CopyScores(scores, topt, input, reoType);
   }
 
   return new PhraseBasedReorderingState(this, topt);
@@ -304,10 +304,10 @@ int BidirectionalReorderingState::Compare(const FFState& o) const
     return m_forward->Compare(*other.m_forward);
 }
 
-LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt,  ScoreComponentCollection* scores) const
+LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const
 {
-  LexicalReorderingState *newbwd = m_backward->Expand(topt, scores);
-  LexicalReorderingState *newfwd = m_forward->Expand(topt, scores);
+  LexicalReorderingState *newbwd = m_backward->Expand(topt,input, scores);
+  LexicalReorderingState *newfwd = m_forward->Expand(topt, input, scores);
   return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
 }
 
@@ -328,7 +328,7 @@ int HierarchicalReorderingBackwardState::Compare(const FFState& o) const
   return m_reoStack.Compare(other.m_reoStack);
 }
 
-LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, ScoreComponentCollection*  scores) const
+LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection*  scores) const
 {
 
   HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack);
@@ -347,7 +347,7 @@ LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const Transl
     reoType = GetOrientationTypeMonotonic(reoDistance);
   }
 
-  CopyScores(scores, topt, reoType);
+  CopyScores(scores, topt, input, reoType);
   return nextState;
 }
 
@@ -431,7 +431,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const
 //  dright: if the next phrase follows the conditioning phrase and other stuff comes in between
 //  dleft:  if the next phrase precedes the conditioning phrase and other stuff comes in between
 
-LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const
+LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const
 {
   const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
   const WordsRange currWordsRange = topt.GetSourceWordsRange();
@@ -454,7 +454,7 @@ LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const Transla
       reoType = GetOrientationTypeLeftRight(currWordsRange, coverage);
     }
 
-    CopyScores(scores, topt, reoType);
+    CopyScores(scores, topt, input, reoType);
   }
 
   return new HierarchicalReorderingForwardState(this, topt);
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index cf91eaf69b..e309ed7f13 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -96,7 +96,7 @@ class LexicalReorderingState : public FFState
 {
 public:
   virtual int Compare(const FFState& o) const = 0;
-  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const = 0;
+  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const = 0;
 
   static LexicalReorderingState* CreateLexicalReorderingState(const std::vector<std::string>& config,
       LexicalReorderingConfiguration::Direction dir, const InputType &input);
@@ -120,10 +120,11 @@ class LexicalReorderingState : public FFState
     : m_configuration(config), m_direction(dir), m_offset(offset), m_prevOption(NULL) {}
 
   // copy the right scores in the right places, taking into account forward/backward, offset, collapse
-  void CopyScores(ScoreComponentCollection* scores, const TranslationOption& topt, ReorderingType reoType) const;
+  void CopyScores(ScoreComponentCollection* scores, const TranslationOption& topt, const InputType& input, ReorderingType reoType) const;
   int ComparePrevScores(const TranslationOption *other) const;
 
   //constants for the different type of reorderings (corresponding to indexes in the table file)
+  public:
   static const ReorderingType M = 0;  // monotonic
   static const ReorderingType NM = 1; // non-monotonic
   static const ReorderingType S = 1;  // swap
@@ -132,7 +133,6 @@ class LexicalReorderingState : public FFState
   static const ReorderingType DR = 3; // discontinuous, right
   static const ReorderingType R = 0;  // right
   static const ReorderingType L = 1;  // left
-  public:
   static const ReorderingType MAX = 3; //largest possible
 };
 
@@ -152,7 +152,7 @@ class BidirectionalReorderingState : public LexicalReorderingState
   }
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection*  scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection*  scores) const;
 };
 
 //! State for the standard Moses implementation of lexical reordering models
@@ -168,7 +168,7 @@ class PhraseBasedReorderingState : public LexicalReorderingState
   PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt);
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection*  scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& topt,const InputType& input, ScoreComponentCollection*  scores) const;
 
   ReorderingType GetOrientationTypeMSD(WordsRange currRange) const;
   ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const;
@@ -189,7 +189,7 @@ class HierarchicalReorderingBackwardState : public LexicalReorderingState
                                       const TranslationOption &topt, ReorderingStack reoStack);
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection*  scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input,  ScoreComponentCollection*  scores) const;
 
 private:
   ReorderingType GetOrientationTypeMSD(int reoDistance) const;
@@ -212,7 +212,7 @@ class HierarchicalReorderingForwardState : public LexicalReorderingState
   HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt);
 
   virtual int Compare(const FFState& o) const;
-  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const;
+  virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const;
 
 private:
   ReorderingType GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const;
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 426aad1830..21dbf0eb7f 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -122,11 +122,42 @@ void SparseReordering::AddFeatures(size_t id,
 }
 
 void SparseReordering::CopyScores(
-               const TranslationOption& topt,
+               const TranslationOption& currentOpt,
+               const TranslationOption* previousOpt,
+               const InputType& input,
                LexicalReorderingState::ReorderingType reoType,
                LexicalReorderingConfiguration::Direction direction,
                ScoreComponentCollection* scores) const 
 {
+  if (m_useBetween && direction == LexicalReorderingConfiguration::Backward &&
+      (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL ||
+        reoType == LexicalReorderingState::DR)) {
+    size_t gapStart, gapEnd;
+    const Sentence& sentence = dynamic_cast<const Sentence&>(input);
+    const WordsRange& currentRange = currentOpt.GetSourceWordsRange();
+    if (previousOpt) {
+      const WordsRange& previousRange = previousOpt->GetSourceWordsRange();
+      if (previousRange < currentRange) {
+        gapStart = previousRange.GetEndPos() + 1;
+        gapEnd = currentRange.GetStartPos();
+      } else {
+        gapStart = currentRange.GetEndPos() + 1;
+        gapEnd = previousRange.GetStartPos();
+      }
+    } else {
+      //start of sentence
+      gapStart = 0;
+      gapEnd  = currentRange.GetStartPos();
+    }
+    assert(gapStart < gapEnd);
+    for (size_t i = gapStart; i < gapEnd; ++i) {
+      for (size_t j = 0; j < m_sourceWordLists.size(); ++j) {
+        AddFeatures(j, SparseReorderingFeatureKey::Between,
+           SparseReorderingFeatureKey::Source, sentence.GetWord(i),
+          SparseReorderingFeatureKey::First, m_sourceWordLists[j], reoType, scores);
+      }
+    }
+  }
   //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
   //phrase (backward)
   //stack (forward)
@@ -144,14 +175,14 @@ void SparseReordering::CopyScores(
     assert(!"Shouldn't call CopyScores() with bidirectional direction");
   }
   for (size_t i = 0; i < m_sourceWordLists.size(); ++i) {
-    const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase();
+    const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
     AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
       SparseReorderingFeatureKey::First, m_sourceWordLists[i], reoType, scores);
     AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1),
       SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores);
   }
   for (size_t i = 0; i < m_targetWordLists.size(); ++i) {
-    const Phrase& targetPhrase = topt.GetTargetPhrase();   
+    const Phrase& targetPhrase = currentOpt.GetTargetPhrase();   
     AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
       SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores);
     AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1),
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 76c72a2019..04d9e8eb79 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -86,8 +86,10 @@ class SparseReordering
 public:
   SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
   
-  //If direction is backward topt is the current option, otherwise the previous
-  void CopyScores(const TranslationOption& topt,
+  //If direction is backward the options will be different, for forward they will be the same
+  void CopyScores(const TranslationOption& currentOpt,
+                  const TranslationOption* previousOpt,
+                  const InputType& input,
                  LexicalReorderingState::ReorderingType reoType,
                  LexicalReorderingConfiguration::Direction direction,
                  ScoreComponentCollection* scores) const ;

From 961d72269aa6c4a4389761d9d8ad93af3530cf92 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 24 Jun 2014 21:50:20 +0100
Subject: [PATCH 27/84] clusters for sparse reordering feature

---
 .../FF/LexicalReordering/SparseReordering.cpp | 132 ++++++++++++------
 moses/FF/LexicalReordering/SparseReordering.h |  19 ++-
 2 files changed, 107 insertions(+), 44 deletions(-)

diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 21dbf0eb7f..3d8f56d16a 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -3,8 +3,13 @@
 #include "moses/FactorCollection.h"
 #include "moses/InputPath.h"
 #include "moses/Util.h"
+
 #include "util/exception.hh"
 
+#include "util/file_piece.hh"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
 #include "LexicalReordering.h"
 #include "SparseReordering.h"
 
@@ -41,6 +46,7 @@ const std::string& SparseReorderingFeatureKey::Name(const string& wordListId) {
   buf << kSep;
   buf << wordListId;
   buf << kSep;
+  if (isCluster) buf << "cluster_";
   buf << word->GetString();
   buf << kSep;
   buf << reoType;
@@ -65,7 +71,15 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
         UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
       }
     } else if (fields[0] == "clusters") {
-      UTIL_THROW(util::Exception, "Sparse reordering does not yet support clusters" << i->first);
+      UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering cluster name should be sparse-clusters-(source|target)-<id>");
+      if (fields[1] == kSource) {
+        ReadClusterMap(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceClusterMaps);
+      } else if (fields[1] == kTarget) {
+        ReadClusterMap(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetClusterMaps);
+      } else {
+        UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
+      }
+
     } else if (fields[0] == "phrase") {
       m_usePhrase = true;
     } else if (fields[0] == "stack") {
@@ -79,6 +93,21 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
 
 }
 
+void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) {
+  for (size_t type = SparseReorderingFeatureKey::Stack;
+                     type <= SparseReorderingFeatureKey::Between; ++type) {
+    for (size_t position = SparseReorderingFeatureKey::First;
+                     position <= SparseReorderingFeatureKey::Last; ++position) {
+      for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
+        SparseReorderingFeatureKey key(
+          index, static_cast<SparseReorderingFeatureKey::Type>(type), factor, isCluster,
+           static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
+        m_featureMap[key] = key.Name(id);
+      }
+    }
+  }
+}
+
 void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<WordList>* pWordLists) {
   ifstream fh(filename.c_str());
   UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
@@ -89,35 +118,68 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp
     //TODO: StringPiece
     const Factor* factor = FactorCollection::Instance().AddFactor(line);
     pWordLists->back().second.insert(factor);
-    //Pre-calculate feature names.
-    for (size_t type = SparseReorderingFeatureKey::Stack;
-                       type <= SparseReorderingFeatureKey::Between; ++type) {
-      for (size_t position = SparseReorderingFeatureKey::First;
-                       position <= SparseReorderingFeatureKey::Last; ++position) {
-        for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) {
-          SparseReorderingFeatureKey key(
-            pWordLists->size()-1, static_cast<SparseReorderingFeatureKey::Type>(type),
-            factor, static_cast<SparseReorderingFeatureKey::Position>(position), side, reoType);
-          m_featureMap[key] = key.Name(id);
-        }
-      }
-    }
+    PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false); 
 
   }
 }
 
-void SparseReordering::AddFeatures(size_t id,
+void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector<ClusterMap>* pClusterMaps) {
+  pClusterMaps->push_back(ClusterMap());
+  pClusterMaps->back().first = id;
+  util::FilePiece file(filename.c_str());
+  StringPiece line;
+  while (true) {
+    try {
+      line = file.ReadLine();
+    } catch (const util::EndOfFileException &e) {
+      break;
+    }
+    util::TokenIter<util::SingleCharacter, true> lineIter(line,util::SingleCharacter('\t'));
+    const Factor* wordFactor = FactorCollection::Instance().AddFactor(*lineIter);
+    ++lineIter;
+    const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
+    pClusterMaps->back().second[wordFactor] = idFactor;
+    PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true); 
+  }
+}
+
+void SparseReordering::AddFeatures(
     SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
     const Word& word, SparseReorderingFeatureKey::Position position,
-    const WordList& words, LexicalReorderingState::ReorderingType reoType,
+    LexicalReorderingState::ReorderingType reoType,
     ScoreComponentCollection* scores) const {
 
   const Factor*  wordFactor = word.GetFactor(0);
-  if (words.second.find(wordFactor) == words.second.end()) return;
-  SparseReorderingFeatureKey key(id, type, wordFactor, position, side, reoType);
-  FeatureMap::const_iterator fmi = m_featureMap.find(key);
-  assert(fmi != m_featureMap.end());
-  scores->PlusEquals(m_producer, fmi->second, 1.0);
+
+  const vector<WordList>* wordLists;
+  const vector<ClusterMap>* clusterMaps;
+  if (side == SparseReorderingFeatureKey::Source) {
+    wordLists = &m_sourceWordLists;
+    clusterMaps = &m_sourceClusterMaps;
+  } else {
+    wordLists = &m_targetWordLists;
+    clusterMaps = &m_targetClusterMaps;
+  }
+
+  for (size_t id = 0; id < wordLists->size(); ++id) {
+    if ((*wordLists)[id].second.find(wordFactor) == (*wordLists)[id].second.end()) continue;
+    SparseReorderingFeatureKey key(id, type, wordFactor, false, position, side, reoType);
+    FeatureMap::const_iterator fmi = m_featureMap.find(key);
+    assert(fmi != m_featureMap.end());
+    scores->PlusEquals(m_producer, fmi->second, 1.0);
+  }
+
+  for (size_t id = 0; id < clusterMaps->size(); ++id) {
+    const ClusterMap& clusterMap = (*clusterMaps)[id];
+    boost::unordered_map<const Factor*, const Factor*>::const_iterator clusterIter
+      = clusterMap.second.find(wordFactor);
+    if (clusterIter != clusterMap.second.end()) {
+      SparseReorderingFeatureKey key(id, type, clusterIter->second, true, position, side, reoType);
+      FeatureMap::const_iterator fmi = m_featureMap.find(key);
+      assert(fmi != m_featureMap.end());
+      scores->PlusEquals(m_producer, fmi->second, 1.0);
+    }
+  }
 
 }
 
@@ -151,11 +213,9 @@ void SparseReordering::CopyScores(
     }
     assert(gapStart < gapEnd);
     for (size_t i = gapStart; i < gapEnd; ++i) {
-      for (size_t j = 0; j < m_sourceWordLists.size(); ++j) {
-        AddFeatures(j, SparseReorderingFeatureKey::Between,
+        AddFeatures(SparseReorderingFeatureKey::Between,
            SparseReorderingFeatureKey::Source, sentence.GetWord(i),
-          SparseReorderingFeatureKey::First, m_sourceWordLists[j], reoType, scores);
-      }
+          SparseReorderingFeatureKey::First, reoType, scores);
     }
   }
   //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
@@ -174,20 +234,14 @@ void SparseReordering::CopyScores(
     type = SparseReorderingFeatureKey::Phrase;
     assert(!"Shouldn't call CopyScores() with bidirectional direction");
   }
-  for (size_t i = 0; i < m_sourceWordLists.size(); ++i) {
-    const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
-    AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
-      SparseReorderingFeatureKey::First, m_sourceWordLists[i], reoType, scores);
-    AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1),
-      SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores);
-  }
-  for (size_t i = 0; i < m_targetWordLists.size(); ++i) {
-    const Phrase& targetPhrase = currentOpt.GetTargetPhrase();   
-    AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
-      SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores);
-    AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1),
-      SparseReorderingFeatureKey::Last, m_targetWordLists[i], reoType, scores);
-  }
+  const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase();
+  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0),
+    SparseReorderingFeatureKey::First, reoType, scores);
+  AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
+  const Phrase& targetPhrase = currentOpt.GetTargetPhrase();   
+  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0),
+    SparseReorderingFeatureKey::First, reoType, scores);
+  AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores);
 
 
 }
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 04d9e8eb79..e496daf942 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -44,13 +44,15 @@ struct SparseReorderingFeatureKey {
   size_t id;
   enum Type {Stack, Phrase, Between} type;
   const Factor* word;
+  bool isCluster;
   enum Position {First, Last} position;
   enum Side {Source, Target} side;
   LexicalReorderingState::ReorderingType reoType;
 
-  SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, Position position_, 
-        Side side_, LexicalReorderingState::ReorderingType reoType_) 
-    : id(id_), type(type_), word(word_), position(position_), side(side_), reoType(reoType_)     
+  SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_,
+   Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_) 
+    : id(id_), type(type_), word(word_), isCluster(isCluster_),
+       position(position_), side(side_), reoType(reoType_)     
   {}
 
   const std::string& Name(const std::string& wordListId) ; 
@@ -64,6 +66,7 @@ struct HashSparseReorderingFeatureKey : public std::unary_function<SparseReorder
     seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed);
     seed = util::MurmurHashNative(&key.type, sizeof(key.type), seed);
     seed = util::MurmurHashNative(&key.word, sizeof(key.word), seed);
+    seed = util::MurmurHashNative(&key.isCluster, sizeof(key.isCluster), seed);
     seed = util::MurmurHashNative(&key.position, sizeof(key.position), seed);
     seed = util::MurmurHashNative(&key.side, sizeof(key.side), seed);
     seed = util::MurmurHashNative(&key.reoType, sizeof(key.reoType), seed);
@@ -99,6 +102,9 @@ class SparseReordering
   typedef std::pair<std::string, boost::unordered_set<const Factor*> > WordList; //id and list
   std::vector<WordList> m_sourceWordLists;
   std::vector<WordList> m_targetWordLists;
+  typedef std::pair<std::string, boost::unordered_map<const Factor*, const Factor*> > ClusterMap; //id and map
+  std::vector<ClusterMap> m_sourceClusterMaps;
+  std::vector<ClusterMap> m_targetClusterMaps;
   bool m_usePhrase;
   bool m_useBetween;
   bool m_useStack;
@@ -107,10 +113,13 @@ class SparseReordering
 
   void ReadWordList(const std::string& filename, const std::string& id,
        SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
-  void AddFeatures(size_t id,
+  void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
+  void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
+
+  void AddFeatures(
     SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,
      const Word& word, SparseReorderingFeatureKey::Position position,
-     const WordList& words, LexicalReorderingState::ReorderingType reoType,
+     LexicalReorderingState::ReorderingType reoType,
     ScoreComponentCollection* scores) const;
 
 };

From 556e1123660eb551de74418e89245a008f1fe0f5 Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Mon, 30 Jun 2014 00:32:11 +0100
Subject: [PATCH 28/84] Major bug fix in Mmsapt.combine_pstats.

---
 moses/TranslationModel/UG/mmsapt.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 65a7a06adf..dc99454728 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -576,9 +576,9 @@ namespace Moses
 	    else pool.update(a->first,a->second);
 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
 	      (*ff)(btb,pool,&ppfix.fvals);
+	    if (ppfix.p2)
+	      tpcoll->Add(createTargetPhrase(src,bta,ppfix));
 	  }
-	if (ppfix.p2)
-	  tpcoll->Add(createTargetPhrase(src,bta,ppfix));
       }
     return (statsa || statsb);
   }

From c4ca243b7aa265c4e2da341671eba6d90c20509d Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 30 Jun 2014 12:13:33 +0100
Subject: [PATCH 29/84] Improved debug for sparse reordering

---
 moses/FF/LexicalReordering/LexicalReordering.cpp | 8 ++++----
 moses/FF/LexicalReordering/SparseReordering.cpp  | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index b0b18c65fa..d692336c9b 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -31,7 +31,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
     } else if (args[0].substr(0,7) == "sparse-") {
       sparseArgs[args[0].substr(7)] = args[1];
     } else {
-      throw "Unknown argument " + args[0];
+      UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
     }
   }
 
@@ -39,17 +39,17 @@ LexicalReordering::LexicalReordering(const std::string &line)
   case LexicalReorderingConfiguration::FE:
   case LexicalReorderingConfiguration::E:
     if(m_factorsE.empty()) {
-      throw "TL factor mask for lexical reordering is unexpectedly empty";
+      UTIL_THROW(util::Exception,"TL factor mask for lexical reordering is unexpectedly empty");
     }
     if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E)
       break; // else fall through
   case LexicalReorderingConfiguration::F:
     if(m_factorsF.empty()) {
-      throw "SL factor mask for lexical reordering is unexpectedly empty";
+      UTIL_THROW(util::Exception,"SL factor mask for lexical reordering is unexpectedly empty");
     }
     break;
   default:
-    throw "Unknown conditioning option!";
+    UTIL_THROW(util::Exception,"Unknown conditioning option!");
   }
 
   m_configuration->ConfigureSparse(sparseArgs, this);
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 3d8f56d16a..bc519eefcb 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -135,8 +135,10 @@ void SparseReordering::ReadClusterMap(const string& filename, const string& id,
       break;
     }
     util::TokenIter<util::SingleCharacter, true> lineIter(line,util::SingleCharacter('\t'));
+    if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing word): '" << line << "'");
     const Factor* wordFactor = FactorCollection::Instance().AddFactor(*lineIter);
     ++lineIter;
+    if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'");
     const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter);
     pClusterMaps->back().second[wordFactor] = idFactor;
     PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true); 

From 17140e4ae70c201b017cb442c013c2003fd1969d Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Tue, 1 Jul 2014 13:01:51 -0400
Subject: [PATCH 30/84] eclipse

---
 contrib/other-builds/moses/.cproject |  5 +++-
 contrib/other-builds/moses/.project  | 40 +++++++++++++++++++++-------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 0d6abbb4f8..409adfcc57 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -80,8 +80,11 @@
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.887148589" name="FuzzyMatchWrapper.cpp" rcbsApplicability="disable" resourcePath="TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1298504775">
+						<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1298504775" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
+					</fileInfo>
 					<sourceEntries>
-						<entry excluding="TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index ff35ca5baa..1c22fca311 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -601,16 +601,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingConstraint.h</locationURI>
 		</link>
-		<link>
-			<name>ReorderingStack.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingStack.cpp</locationURI>
-		</link>
-		<link>
-			<name>ReorderingStack.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingStack.h</locationURI>
-		</link>
 		<link>
 			<name>RuleCube.cpp</name>
 			<type>1</type>
@@ -1331,6 +1321,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SpanLength.h</locationURI>
 		</link>
+		<link>
+			<name>FF/SparseHieroReorderingFeature.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SparseHieroReorderingFeature.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/SparseHieroReorderingFeature.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SparseHieroReorderingFeature.h</locationURI>
+		</link>
 		<link>
 			<name>FF/StatefulFeatureFunction.cpp</name>
 			<type>1</type>
@@ -1916,6 +1916,26 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LexicalReorderingTable.h</locationURI>
 		</link>
+		<link>
+			<name>FF/LexicalReordering/ReorderingStack.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/ReorderingStack.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/ReorderingStack.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/ReorderingStack.h</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/SparseReordering.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/SparseReordering.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LexicalReordering/SparseReordering.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/SparseReordering.h</locationURI>
+		</link>
 		<link>
 			<name>FF/OSM-Feature/OpSequenceModel.cpp</name>
 			<type>1</type>

From bdf8d1a405936e9cfaf3fd295fa7c7f57e5362bb Mon Sep 17 00:00:00 2001
From: Rico Sennrich <rico.sennrich@gmx.ch>
Date: Fri, 4 Jul 2014 10:28:24 +0100
Subject: [PATCH 31/84] CreateFromString no longer requires factorDelimiter

---
 moses/TranslationModel/PhraseDictionaryMultiModel.cpp       | 5 +----
 moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
index 9f39965052..a1824b4751 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp
@@ -323,9 +323,6 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector
 vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
 {
 
-  const StaticData &staticData = StaticData::Instance();
-  const string& factorDelimiter = staticData.GetFactorDelimiter();
-
   map<pair<string, string>, size_t> phrase_pair_map;
 
   for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
@@ -344,7 +341,7 @@ vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string,
     map<string,multiModelStatistics*>* allStats = new(map<string,multiModelStatistics*>);
 
     Phrase sourcePhrase(0);
-    sourcePhrase.CreateFromString(Input, m_input, source_string, factorDelimiter, NULL);
+    sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
 
     CollectSufficientStatistics(sourcePhrase, allStats); //optimization potential: only call this once per source phrase
 
diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
index 99d3ad2567..83aa4a7186 100644
--- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
+++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp
@@ -489,9 +489,6 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic
 vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
 {
 
-  const StaticData &staticData = StaticData::Instance();
-  const string& factorDelimiter = staticData.GetFactorDelimiter();
-
   map<pair<string, string>, size_t> phrase_pair_map;
 
   for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
@@ -510,7 +507,7 @@ vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<s
     map<string,multiModelCountsStatistics*>* allStats = new(map<string,multiModelCountsStatistics*>);
 
     Phrase sourcePhrase(0);
-    sourcePhrase.CreateFromString(Input, m_input, source_string, factorDelimiter, NULL);
+    sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
 
     CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase
 

From e1c9405b23e9ab9bc6821aa7ac4068748993baa6 Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Wed, 9 Jul 2014 02:39:28 +0100
Subject: [PATCH 32/84] Utilities to check gain in phrase coverage by dynamic
 augmentation of the phrase table in a post-editing scenario.

---
 .../TranslationModel/UG/spe-check-coverage.cc | 214 ++++++++++++++++++
 .../UG/spe-check-coverage2.cc                 |  76 +++++++
 2 files changed, 290 insertions(+)
 create mode 100644 moses/TranslationModel/UG/spe-check-coverage.cc
 create mode 100644 moses/TranslationModel/UG/spe-check-coverage2.cc

diff --git a/moses/TranslationModel/UG/spe-check-coverage.cc b/moses/TranslationModel/UG/spe-check-coverage.cc
new file mode 100644
index 0000000000..039b4cd371
--- /dev/null
+++ b/moses/TranslationModel/UG/spe-check-coverage.cc
@@ -0,0 +1,214 @@
+#include "mmsapt.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+vector<FactorType> fo(1,FactorType(0));
+
+class SimplePhrase : public Moses::Phrase
+{
+  vector<FactorType> const m_fo; // factor order
+public:
+  SimplePhrase(): m_fo(1,FactorType(0)) {}
+  
+  void init(string const& s) 
+  {
+    istringstream buf(s); string w;
+    while (buf >> w) 
+      {
+	Word wrd; 
+	this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
+      }
+  }
+};
+
+class TargetPhraseIndexSorter
+{
+  TargetPhraseCollection const& my_tpc;
+  CompareTargetPhrase cmp;
+public:
+  TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {}
+  bool operator()(size_t a, size_t b) const
+  {
+    // return cmp(*my_tpc[a], *my_tpc[b]);
+    return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >  
+	    my_tpc[b]->GetScoreBreakdown().GetWeightedScore());
+  }
+};
+
+int main(int argc, char* argv[])
+{
+
+  string vlevel = "alt"; // verbosity level
+  vector<pair<string,int> > argfilter(5);
+  argfilter[0] = std::make_pair(string("--spe-src"),1);
+  argfilter[1] = std::make_pair(string("--spe-trg"),1);
+  argfilter[2] = std::make_pair(string("--spe-aln"),1);
+  argfilter[3] = std::make_pair(string("--spe-show"),1);
+  
+  char** my_args; int my_acnt;
+  char** mo_args; int mo_acnt;
+  filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
+
+  ifstream spe_src,spe_trg,spe_aln;
+  // instead of translating show coverage by phrase tables
+  for (int i = 0; i < my_acnt; i += 2)
+    {
+      if (!strcmp(my_args[i],"--spe-src"))
+	spe_src.open(my_args[i+1]);
+      else if (!strcmp(my_args[i],"--spe-trg"))
+	spe_trg.open(my_args[i+1]);
+      else if (!strcmp(my_args[i],"--spe-aln"))
+	spe_aln.open(my_args[i+1]);
+      else if (!strcmp(my_args[i],"--spe-show"))
+	vlevel = my_args[i+1];
+    }
+  
+  Parameter params;
+  if (!params.LoadParam(mo_acnt,mo_args) || 
+      !StaticData::LoadDataStatic(&params, mo_args[0]))
+    exit(1);
+
+  StaticData const& global = StaticData::Instance();
+  global.SetVerboseLevel(0);
+  vector<FactorType> ifo = global.GetInputFactorOrder();
+
+  PhraseDictionary* PT = PhraseDictionary::GetColl()[0];
+  Mmsapt* mmsapt = dynamic_cast<Mmsapt*>(PT);
+  if (!mmsapt)
+    {
+      cerr << "Phrase table implementation not supported by this utility." << endl;
+      exit(1);
+    }
+  mmsapt->SetTableLimit(0);
+  
+  string srcline,trgline,alnline;
+  cout.precision(2);
+  vector<string> fname = mmsapt->GetFeatureNames();
+  while (getline(spe_src,srcline))
+    {
+      UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE 
+		     << ": missing data for online updates.");
+      UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE 
+		     << ": missing data for online updates.");
+      cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl;
+
+      // cout << srcline << " " << HERE << endl;
+      Sentence snt;
+      istringstream buf(srcline+"\n");
+      if (!snt.Read(buf,ifo)) break;
+      // cout << Phrase(snt) << endl;
+      int dynprovidx = -1;
+      for (size_t i = 0; i < fname.size(); ++i)
+	{
+	  if (fname[i].substr(0,7) == "prov-1.") 
+	    dynprovidx = i;
+	}
+      cout << endl;
+      for (size_t i = 0; i < snt.GetSize(); ++i)
+	{
+	  for (size_t k = i; k < snt.GetSize(); ++k)
+	    {
+	      Phrase p = snt.GetSubString(WordsRange(i,k));
+	      if (!mmsapt->PrefixExists(p)) break;
+	      TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
+	      if (!trg || !trg->GetSize()) continue;
+	      
+	      bool header_done = false;
+	      bool has_dynamic_match = vlevel == "all" || vlevel == "ALL";
+	      vector<size_t> order; order.reserve(trg->GetSize()); 
+	      size_t stop = trg->GetSize();
+
+	      vector<size_t> o2(trg->GetSize());
+	      for (size_t i = 0; i < stop; ++i) o2[i] = i;
+	      sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg));
+		
+	      for (size_t r = 0; r < stop; ++r) // r for rank
+		{
+		  if (vlevel != "ALL")
+		    {
+		      Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
+		      ostringstream buf; buf << phr; 
+		      string tphrase = buf.str(); 
+		      tphrase.erase(tphrase.size()-1);
+		      size_t s = trgline.find(tphrase);
+		      if (s == string::npos) continue;
+		      size_t e = s + tphrase.size();
+		      if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' '))
+			continue; 
+		    }
+		  order.push_back(r);
+		  if (!has_dynamic_match)
+		    {
+		      ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
+		      ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
+		      FVector const& scores = scc.GetScoresVector();
+		      has_dynamic_match = scores[idx.first + dynprovidx] > 0;
+		    }
+		}
+	      if ((vlevel == "alt" || vlevel == "new") && !has_dynamic_match)
+		continue;
+
+
+	      BOOST_FOREACH(size_t const& r, order)
+		{
+		  ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
+		  ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
+		  FVector const& scores = scc.GetScoresVector();
+		  float wscore = scc.GetWeightedScore();
+		  if (vlevel == "new" && scores[idx.first + dynprovidx] == 0) 
+		    continue;
+		  if (!header_done)
+		    {
+		      cout << endl;
+		      if (trg->GetSize() == 1)
+			cout << p << " (1 translation option)" << endl;
+		      else
+			cout << p << " (" << trg->GetSize() << " translation options)" << endl;
+		      header_done = true;
+		    }
+		  Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
+		  cout << setw(3) << r+1 << " " << phr << endl;
+		  cout << "   ";
+		  BOOST_FOREACH(string const& fn, fname)
+		    cout << " " << format("%10.10s") % fn;
+		  cout << endl;
+		  cout << "   ";
+		  for (size_t x = idx.first; x < idx.second; ++x)
+		    {
+		      size_t j = x-idx.first;
+		      float f = (mmsapt && mmsapt->isLogVal(j)) ? exp(scores[x]) : scores[x];
+		      string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
+		      if (fname[j].substr(0,3) == "lex") fmt = "%10.3e";
+		      if (fname[j].substr(0,7) == "prov-1.") 
+			{
+			  f = round(f/(1-f));
+			  fmt = "%10d";
+			}
+		      cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f);
+		    }
+		  cout << " " << format("%10.3e") % exp(wscore) 
+		       << " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl;
+		}
+	      mmsapt->Release(trg);
+	      continue;
+	    }
+	}
+      mmsapt->add(srcline,trgline,alnline);
+    }
+  // }
+  exit(0);
+}
+  
+  
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage2.cc b/moses/TranslationModel/UG/spe-check-coverage2.cc
new file mode 100644
index 0000000000..fa9ce1c85b
--- /dev/null
+++ b/moses/TranslationModel/UG/spe-check-coverage2.cc
@@ -0,0 +1,76 @@
+#include "mmsapt.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+typedef L2R_Token<SimpleWordId> Token;
+typedef mmBitext<Token> mmbitext;
+typedef imBitext<Token> imbitext;
+typedef Bitext<Token>::iter iter;
+
+mmbitext bg;
+
+void 
+show(ostream& out, iter& f)
+{
+  iter b(bg.I2.get(),f.getToken(0),f.size());
+  if (b.size() == f.size())
+    out << setw(12) << int(round(b.approxOccurrenceCount()));
+  else
+    out << string(12,' ');
+  out << " " << setw(5) <<  int(round(f.approxOccurrenceCount())) << " ";
+  out << f.str(bg.V1.get()) << endl; 
+}
+
+
+void 
+dump(ostream& out, iter& f)
+{
+  float cnt = f.size() ? f.approxOccurrenceCount() : 0;
+  if (f.down())
+    {
+      cnt = f.approxOccurrenceCount();
+      do { dump(out,f); }
+      while (f.over());
+      f.up();
+    }
+  if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1) 
+    show(out,f);
+}
+
+
+void 
+read_data(string fname, vector<string>& dest)
+{
+  ifstream in(fname.c_str());
+  string line;
+  while (getline(in,line)) dest.push_back(line);
+  in.close();
+}
+
+int main(int argc, char* argv[])
+{
+  bg.open(argv[1],argv[2],argv[3]);
+  sptr<imbitext> fg(new imbitext(bg.V1,bg.V2));
+  vector<string> src,trg,aln;
+  read_data(argv[4],src);
+  read_data(argv[5],trg);
+  read_data(argv[6],aln);
+  fg = fg->add(src,trg,aln);
+  iter mfg(fg->I1.get());
+  dump(cout,mfg);
+  exit(0);
+}
+  
+  
+

From 28d64e23396cba53a83b75e01e9977db636e2285 Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Wed, 9 Jul 2014 02:40:40 +0100
Subject: [PATCH 33/84] Simulated post-editing sessions feeding reference and
 alignment into the system after automatic translation of each source
 sentence.

---
 moses-cmd/simulate-pe.cc | 856 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 856 insertions(+)
 create mode 100644 moses-cmd/simulate-pe.cc

diff --git a/moses-cmd/simulate-pe.cc b/moses-cmd/simulate-pe.cc
new file mode 100644
index 0000000000..e88c1e4635
--- /dev/null
+++ b/moses-cmd/simulate-pe.cc
@@ -0,0 +1,856 @@
+// Fork of Main.cpp, to simulate post-editing sessions.
+// Written by Ulrich Germann.
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2009 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/foreach.hpp>
+
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#include "util/usage.hh"
+#include "util/exception.hh"
+#include "moses/Util.h"
+#include "moses/TranslationModel/UG/mmsapt.h"
+#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
+
+#ifdef WIN32
+// Include Visual Leak Detector
+//#include <vld.h>
+#endif
+
+#include "TranslationAnalysis.h"
+#include "IOWrapper.h"
+#include "mbr.h"
+
+#include "moses/Hypothesis.h"
+#include "moses/Manager.h"
+#include "moses/StaticData.h"
+#include "moses/Util.h"
+#include "moses/Timer.h"
+#include "moses/ThreadPool.h"
+#include "moses/OutputCollector.h"
+#include "moses/TranslationModel/PhraseDictionary.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/FF/StatelessFeatureFunction.h"
+
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+namespace MosesCmd
+{
+// output floats with five significant digits
+static const size_t PRECISION = 3;
+
+/** Enforce rounding */
+void fix(std::ostream& stream, size_t size)
+{
+  stream.setf(std::ios::fixed);
+  stream.precision(size);
+}
+
+/** Translates a sentence.
+  * - calls the search (Manager)
+  * - applies the decision rule
+  * - outputs best translation and additional reporting
+  **/
+class TranslationTask : public Task
+{
+
+public:
+
+  TranslationTask(size_t lineNumber,
+                  InputType* source, 
+		  OutputCollector* outputCollector, 
+		  OutputCollector* nbestCollector,
+                  OutputCollector* latticeSamplesCollector,
+                  OutputCollector* wordGraphCollector, 
+		  OutputCollector* searchGraphCollector,
+                  OutputCollector* detailedTranslationCollector,
+                  OutputCollector* alignmentInfoCollector,
+                  OutputCollector* unknownsCollector,
+                  bool outputSearchGraphSLF,
+                  bool outputSearchGraphHypergraph) 
+    : m_source(source)
+    , m_lineNumber(lineNumber)
+    , m_outputCollector(outputCollector)
+    , m_nbestCollector(nbestCollector)
+    , m_latticeSamplesCollector(latticeSamplesCollector)
+    , m_wordGraphCollector(wordGraphCollector)
+    , m_searchGraphCollector(searchGraphCollector)
+    , m_detailedTranslationCollector(detailedTranslationCollector)
+    , m_alignmentInfoCollector(alignmentInfoCollector)
+    , m_unknownsCollector(unknownsCollector)
+    , m_outputSearchGraphSLF(outputSearchGraphSLF)
+    , m_outputSearchGraphHypergraph(outputSearchGraphHypergraph) 
+  { }
+
+  /** Translate one sentence
+   * gets called by main function implemented at end of this source file */
+  void Run() {
+    // shorthand for "global data"
+    const StaticData &staticData = StaticData::Instance();
+
+    // input sentence
+    Sentence sentence;
+
+    // report wall time spent on translation
+    Timer translationTime;
+    translationTime.start();
+
+    // report thread number
+#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
+    TRACE_ERR("Translating line " << m_lineNumber << "  in thread id " << pthread_self() << std::endl);
+#endif
+
+
+    // execute the translation
+    // note: this executes the search, resulting in a search graph
+    //       we still need to apply the decision rule (MAP, MBR, ...)
+    Timer initTime;
+    initTime.start();
+    Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm());
+    VERBOSE(1, "Line " << m_lineNumber << ": Initialize search took " << initTime << " seconds total" << endl);
+    manager.ProcessSentence();
+
+    // we are done with search, let's look what we got
+    Timer additionalReportingTime;
+    additionalReportingTime.start();
+
+    // output word graph
+    if (m_wordGraphCollector) {
+      ostringstream out;
+      fix(out,PRECISION);
+      manager.GetWordGraph(m_lineNumber, out);
+      m_wordGraphCollector->Write(m_lineNumber, out.str());
+    }
+
+    // output search graph
+    if (m_searchGraphCollector) {
+      ostringstream out;
+      fix(out,PRECISION);
+      manager.OutputSearchGraph(m_lineNumber, out);
+      m_searchGraphCollector->Write(m_lineNumber, out.str());
+
+#ifdef HAVE_PROTOBUF
+      if (staticData.GetOutputSearchGraphPB()) {
+        ostringstream sfn;
+        sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << m_lineNumber << ".pb" << ends;
+        string fn = sfn.str();
+        VERBOSE(2, "Writing search graph to " << fn << endl);
+        fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
+        manager.SerializeSearchGraphPB(m_lineNumber, output);
+      }
+#endif
+    }
+
+    // Output search graph in HTK standard lattice format (SLF)
+    if (m_outputSearchGraphSLF) {
+      stringstream fileName;
+      fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << m_lineNumber << ".slf";
+      std::ofstream *file = new std::ofstream;
+      file->open(fileName.str().c_str());
+      if (file->is_open() && file->good()) {
+        ostringstream out;
+        fix(out,PRECISION);
+        manager.OutputSearchGraphAsSLF(m_lineNumber, out);
+        *file << out.str();
+        file -> flush();
+      } else {
+        TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl);
+      }
+      delete file;
+    }
+
+    // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder
+    if (m_outputSearchGraphHypergraph) {
+
+      vector<string> hypergraphParameters = staticData.GetParam("output-search-graph-hypergraph");
+
+      bool appendSuffix;
+      if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") {
+        appendSuffix = true;
+      } else {
+        appendSuffix = false;
+      }
+
+      string compression;
+      if (hypergraphParameters.size() > 1) {
+        compression = hypergraphParameters[1];
+      } else {
+        compression = "txt";
+      }
+
+      string hypergraphDir;
+      if ( hypergraphParameters.size() > 2 ) {
+        hypergraphDir = hypergraphParameters[2];
+      } else {
+        string nbestFile = staticData.GetNBestFilePath();
+        if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
+          boost::filesystem::path nbestPath(nbestFile);
+
+          // In the Boost filesystem API version 2,
+          //   which was the default prior to Boost 1.46,
+          //   the filename() method returned a string.
+          //
+          // In the Boost filesystem API version 3,
+          //   which is the default starting with Boost 1.46,
+          //   the filename() method returns a path object.
+          //
+          // To get a string from the path object,
+          //   the native() method must be called.
+          //	  hypergraphDir = nbestPath.parent_path().filename()
+          //#if BOOST_VERSION >= 104600
+          //	    .native()
+          //#endif
+          //;
+
+          // Hopefully the following compiles under all versions of Boost.
+          //
+          // If this line gives you compile errors,
+          //   contact Lane Schwartz on the Moses mailing list
+          hypergraphDir = nbestPath.parent_path().string();
+
+        } else {
+          stringstream hypergraphDirName;
+          hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph";
+          hypergraphDir = hypergraphDirName.str();
+        }
+      }
+
+      if ( ! boost::filesystem::exists(hypergraphDir) ) {
+        boost::filesystem::create_directory(hypergraphDir);
+      }
+
+      if ( ! boost::filesystem::exists(hypergraphDir) ) {
+        TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because the directory does not exist" << std::endl);
+      } else if ( ! boost::filesystem::is_directory(hypergraphDir) ) {
+        TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because that path exists, but is not a directory" << std::endl);
+      } else {
+        stringstream fileName;
+        fileName << hypergraphDir << "/" << m_lineNumber;
+        if ( appendSuffix ) {
+          fileName << "." << compression;
+        }
+        boost::iostreams::filtering_ostream *file 
+	  = new boost::iostreams::filtering_ostream;
+
+        if ( compression == "gz" ) {
+          file->push( boost::iostreams::gzip_compressor() );
+        } else if ( compression == "bz2" ) {
+          file->push( boost::iostreams::bzip2_compressor() );
+        } else if ( compression != "txt" ) {
+          TRACE_ERR("Unrecognized hypergraph compression format (" 
+		    << compression 
+		    << ") - using uncompressed plain txt" << std::endl);
+          compression = "txt";
+        }
+
+        file->push( boost::iostreams::file_sink(fileName.str(), ios_base::out) );
+
+        if (file->is_complete() && file->good()) {
+          fix(*file,PRECISION);
+          manager.OutputSearchGraphAsHypergraph(m_lineNumber, *file);
+          file -> flush();
+        } else {
+          TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber 
+		    << " because the output file " << fileName.str() 
+		    << " is not open or not ready for writing" 
+		    << std::endl);
+        }
+        file -> pop();
+        delete file;
+      }
+    }
+    additionalReportingTime.stop();
+
+    // apply decision rule and output best translation(s)
+    if (m_outputCollector) {
+      ostringstream out;
+      ostringstream debug;
+      fix(debug,PRECISION);
+
+      // all derivations - send them to debug stream
+      if (staticData.PrintAllDerivations()) {
+        additionalReportingTime.start();
+        manager.PrintAllDerivations(m_lineNumber, debug);
+        additionalReportingTime.stop();
+      }
+
+      Timer decisionRuleTime;
+      decisionRuleTime.start();
+
+      // MAP decoding: best hypothesis
+      const Hypothesis* bestHypo = NULL;
+      if (!staticData.UseMBR()) {
+        bestHypo = manager.GetBestHypothesis();
+        if (bestHypo) {
+          if (StaticData::Instance().GetOutputHypoScore()) {
+            out << bestHypo->GetTotalScore() << ' ';
+          }
+          if (staticData.IsPathRecoveryEnabled()) {
+            OutputInput(out, bestHypo);
+            out << "||| ";
+          }
+          if (staticData.GetParam("print-id").size() && Scan<bool>(staticData.GetParam("print-id")[0]) ) {
+            out << m_source->GetTranslationId() << " ";
+          }
+
+	  if (staticData.GetReportSegmentation() == 2) {
+	    manager.GetOutputLanguageModelOrder(out, bestHypo);
+	  }
+          OutputBestSurface(
+            out,
+            bestHypo,
+            staticData.GetOutputFactorOrder(),
+            staticData.GetReportSegmentation(),
+            staticData.GetReportAllFactors());
+          if (staticData.PrintAlignmentInfo()) {
+            out << "||| ";
+            OutputAlignment(out, bestHypo);
+          }
+
+          OutputAlignment(m_alignmentInfoCollector, m_lineNumber, bestHypo);
+          IFVERBOSE(1) {
+            debug << "BEST TRANSLATION: " << *bestHypo << endl;
+          }
+        } else {
+          VERBOSE(1, "NO BEST TRANSLATION" << endl);
+        }
+
+        out << endl;
+      }
+
+      // MBR decoding (n-best MBR, lattice MBR, consensus)
+      else {
+        // we first need the n-best translations
+        size_t nBestSize = staticData.GetMBRSize();
+        if (nBestSize <= 0) {
+          cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
+          exit(1);
+        }
+        TrellisPathList nBestList;
+        manager.CalcNBest(nBestSize, nBestList,true);
+        VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
+        IFVERBOSE(2) {
+          PrintUserTime("calculated n-best list for (L)MBR decoding");
+        }
+
+        // lattice MBR
+        if (staticData.UseLatticeMBR()) {
+          if (m_nbestCollector) {
+            //lattice mbr nbest
+            vector<LatticeMBRSolution> solutions;
+            size_t n  = min(nBestSize, staticData.GetNBestSize());
+            getLatticeMBRNBest(manager,nBestList,solutions,n);
+            ostringstream out;
+            OutputLatticeMBRNBest(out, solutions,m_lineNumber);
+            m_nbestCollector->Write(m_lineNumber, out.str());
+          } else {
+            //Lattice MBR decoding
+            vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
+            OutputBestHypo(mbrBestHypo, m_lineNumber, staticData.GetReportSegmentation(),
+                           staticData.GetReportAllFactors(),out);
+            IFVERBOSE(2) {
+              PrintUserTime("finished Lattice MBR decoding");
+            }
+          }
+        }
+
+        // consensus decoding
+        else if (staticData.UseConsensusDecoding()) {
+          const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
+          OutputBestHypo(conBestHypo, m_lineNumber,
+                         staticData.GetReportSegmentation(),
+                         staticData.GetReportAllFactors(),out);
+          OutputAlignment(m_alignmentInfoCollector, m_lineNumber, conBestHypo);
+          IFVERBOSE(2) {
+            PrintUserTime("finished Consensus decoding");
+          }
+        }
+
+        // n-best MBR decoding
+        else {
+          const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList);
+          OutputBestHypo(mbrBestHypo, m_lineNumber,
+                         staticData.GetReportSegmentation(),
+                         staticData.GetReportAllFactors(),out);
+          OutputAlignment(m_alignmentInfoCollector, m_lineNumber, mbrBestHypo);
+          IFVERBOSE(2) {
+            PrintUserTime("finished MBR decoding");
+          }
+        }
+      }
+
+      // report best translation to output collector
+      m_outputCollector->Write(m_lineNumber,out.str(),debug.str());
+
+      decisionRuleTime.stop();
+      VERBOSE(1, "Line " << m_lineNumber << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
+    }
+
+    additionalReportingTime.start();
+
+    // output n-best list
+    if (m_nbestCollector && !staticData.UseLatticeMBR()) {
+      TrellisPathList nBestList;
+      ostringstream out;
+      manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
+      OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_lineNumber,
+                  staticData.GetReportSegmentation());
+      m_nbestCollector->Write(m_lineNumber, out.str());
+    }
+
+    //lattice samples
+    if (m_latticeSamplesCollector) {
+      TrellisPathList latticeSamples;
+      ostringstream out;
+      manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
+      OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_lineNumber,
+                  staticData.GetReportSegmentation());
+      m_latticeSamplesCollector->Write(m_lineNumber, out.str());
+    }
+
+    // detailed translation reporting
+    if (m_detailedTranslationCollector) {
+      ostringstream out;
+      fix(out,PRECISION);
+      TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
+      m_detailedTranslationCollector->Write(m_lineNumber,out.str());
+    }
+
+    //list of unknown words
+    if (m_unknownsCollector) {
+      const vector<const Phrase*>& unknowns = manager.getSntTranslationOptions()->GetUnknownSources();
+      ostringstream out;
+      for (size_t i = 0; i < unknowns.size(); ++i) {
+        out << *(unknowns[i]);
+      }
+      out << endl;
+      m_unknownsCollector->Write(m_lineNumber, out.str());
+    }
+
+    // report additional statistics
+    manager.CalcDecoderStatistics();
+    VERBOSE(1, "Line " << m_lineNumber << ": Additional reporting took " << additionalReportingTime << " seconds total" << endl);
+    VERBOSE(1, "Line " << m_lineNumber << ": Translation took " << translationTime << " seconds total" << endl);
+    IFVERBOSE(2) {
+      PrintUserTime("Sentence Decoding Time:");
+    }
+  }
+
+  ~TranslationTask() {
+    delete m_source;
+  }
+
+private:
+  InputType* m_source;
+  size_t m_lineNumber;
+  OutputCollector* m_outputCollector;
+  OutputCollector* m_nbestCollector;
+  OutputCollector* m_latticeSamplesCollector;
+  OutputCollector* m_wordGraphCollector;
+  OutputCollector* m_searchGraphCollector;
+  OutputCollector* m_detailedTranslationCollector;
+  OutputCollector* m_alignmentInfoCollector;
+  OutputCollector* m_unknownsCollector;
+  bool m_outputSearchGraphSLF;
+  bool m_outputSearchGraphHypergraph;
+  std::ofstream *m_alignmentStream;
+
+
+};
+
+static void PrintFeatureWeight(const FeatureFunction* ff)
+{
+  cout << ff->GetScoreProducerDescription() << "=";
+  size_t numScoreComps = ff->GetNumScoreComponents();
+  vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
+  for (size_t i = 0; i < numScoreComps; ++i) {
+    cout << " " << values[i];
+  }
+  cout << endl;
+}
+
+static void ShowWeights()
+{
+  //TODO: Find a way of ensuring this order is synced with the nbest
+  fix(cout,6);
+  const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
+  const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+
+  for (size_t i = 0; i < sff.size(); ++i) {
+    const StatefulFeatureFunction *ff = sff[i];
+    if (ff->IsTuneable()) {
+      PrintFeatureWeight(ff);
+    }
+    else {
+      cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
+    }
+  }
+  for (size_t i = 0; i < slf.size(); ++i) {
+    const StatelessFeatureFunction *ff = slf[i];
+    if (ff->IsTuneable()) {
+      PrintFeatureWeight(ff);
+    }
+    else {
+      cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
+    }
+  }
+}
+
+size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream)
+{
+  size_t numScoreComps = ff->GetNumScoreComponents();
+  if (numScoreComps != 0) {
+    vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
+    if (numScoreComps > 1) {
+      for (size_t i = 0; i < numScoreComps; ++i) {
+        outputSearchGraphStream << ff->GetScoreProducerDescription()
+                                << i
+                                << "=" << values[i] << endl;
+      }
+    } else {
+      outputSearchGraphStream << ff->GetScoreProducerDescription()
+                              << "=" << values[0] << endl;
+    }
+    return index+numScoreComps;
+  } else {
+    UTIL_THROW2("Sparse features are not yet supported when outputting hypergraph format");
+  }
+}
+
+void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
+{
+  outputSearchGraphStream.setf(std::ios::fixed);
+  outputSearchGraphStream.precision(6);
+
+  const vector<const StatelessFeatureFunction*>& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions();
+  const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
+  size_t featureIndex = 1;
+  for (size_t i = 0; i < sff.size(); ++i) {
+    featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, sff[i], outputSearchGraphStream);
+  }
+  for (size_t i = 0; i < slf.size(); ++i) {
+    /*
+    if (slf[i]->GetScoreProducerWeightShortName() != "u" &&
+          slf[i]->GetScoreProducerWeightShortName() != "tm" &&
+          slf[i]->GetScoreProducerWeightShortName() != "I" &&
+          slf[i]->GetScoreProducerWeightShortName() != "g")
+    */
+    {
+      featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, slf[i], outputSearchGraphStream);
+    }
+  }
+  const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
+  for( size_t i=0; i<pds.size(); i++ ) {
+    featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, pds[i], outputSearchGraphStream);
+  }
+  const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
+  for( size_t i=0; i<gds.size(); i++ ) {
+    featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, gds[i], outputSearchGraphStream);
+  }
+
+}
+
+
+} //namespace
+
+/** main function of the command line version of the decoder **/
+int main(int argc, char** argv)
+{
+  try {
+
+#ifdef HAVE_PROTOBUF
+    GOOGLE_PROTOBUF_VERIFY_VERSION;
+#endif
+    
+    // echo command line, if verbose
+    IFVERBOSE(1) {
+      TRACE_ERR("command: ");
+      for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+      TRACE_ERR(endl);
+    }
+
+    // set number of significant decimals in output
+    fix(cout,PRECISION);
+    fix(cerr,PRECISION);
+
+    vector<pair<string,int> > argfilter(4);
+    argfilter[0] = std::make_pair(string("--spe-src"),1);
+    argfilter[1] = std::make_pair(string("--spe-trg"),1);
+    argfilter[2] = std::make_pair(string("--spe-aln"),1);
+
+    char** my_args; int my_acnt;
+    char** mo_args; int mo_acnt;
+    filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
+
+    ifstream spe_src,spe_trg,spe_aln;
+    // instead of translating show coverage by phrase tables
+    for (int i = 0; i < my_acnt; i += 2)
+      {
+	if (!strcmp(my_args[i],"--spe-src"))
+	  spe_src.open(my_args[i+1]);
+	else if (!strcmp(my_args[i],"--spe-trg"))
+	  spe_trg.open(my_args[i+1]);
+	else if (!strcmp(my_args[i],"--spe-aln"))
+	  spe_aln.open(my_args[i+1]);
+      }
+
+    // load all the settings into the Parameter class
+    // (stores them as strings, or array of strings)
+    Parameter params;
+    if (!params.LoadParam(mo_acnt,mo_args)) {
+      exit(1);
+    }
+
+
+    // initialize all "global" variables, which are stored in StaticData
+    // note: this also loads models such as the language model, etc.
+    if (!StaticData::LoadDataStatic(&params, argv[0])) {
+      exit(1);
+    }
+
+    // setting "-show-weights" -> just dump out weights and exit
+    if (params.isParamSpecified("show-weights")) {
+      ShowWeights();
+      exit(0);
+    }
+
+    // shorthand for accessing information in StaticData
+    const StaticData& staticData = StaticData::Instance();
+
+
+    //initialise random numbers
+    srand(time(NULL));
+
+    // set up read/writing class
+    IOWrapper* ioWrapper = GetIOWrapper(staticData);
+    if (!ioWrapper) {
+      cerr << "Error; Failed to create IO object" << endl;
+      exit(1);
+    }
+
+    // check on weights
+    const ScoreComponentCollection& weights = staticData.GetAllWeights();
+    IFVERBOSE(2) {
+      TRACE_ERR("The global weight vector looks like this: ");
+      TRACE_ERR(weights);
+      TRACE_ERR("\n");
+    }
+    if (staticData.GetOutputSearchGraphHypergraph()) {
+      ofstream* weightsOut = new std::ofstream;
+      stringstream weightsFilename;
+      if (staticData.GetParam("output-search-graph-hypergraph").size() > 3) {
+        weightsFilename << staticData.GetParam("output-search-graph-hypergraph")[3];
+      } else {
+        string nbestFile = staticData.GetNBestFilePath();
+        if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) {
+          boost::filesystem::path nbestPath(nbestFile);
+          weightsFilename << nbestPath.parent_path().filename() << "/weights";
+        } else {
+          weightsFilename << boost::filesystem::current_path().string() << "/hypergraph/weights";
+        }
+      }
+      boost::filesystem::path weightsFilePath(weightsFilename.str());
+      if ( ! boost::filesystem::exists(weightsFilePath.parent_path()) ) {
+        boost::filesystem::create_directory(weightsFilePath.parent_path());
+      }
+      TRACE_ERR("The weights file is " << weightsFilename.str() << "\n");
+      weightsOut->open(weightsFilename.str().c_str());
+      OutputFeatureWeightsForHypergraph(*weightsOut);
+      weightsOut->flush();
+      weightsOut->close();
+      delete weightsOut;
+    }
+
+
+    // initialize output streams
+    // note: we can't just write to STDOUT or files
+    // because multithreading may return sentences in shuffled order
+    auto_ptr<OutputCollector> outputCollector; // for translations
+    auto_ptr<OutputCollector> nbestCollector;  // for n-best lists
+    auto_ptr<OutputCollector> latticeSamplesCollector; //for lattice samples
+    auto_ptr<ofstream> nbestOut;
+    auto_ptr<ofstream> latticeSamplesOut;
+    size_t nbestSize = staticData.GetNBestSize();
+    string nbestFile = staticData.GetNBestFilePath();
+    bool output1best = true;
+    if (nbestSize) {
+      if (nbestFile == "-" || nbestFile == "/dev/stdout") {
+        // nbest to stdout, no 1-best
+        nbestCollector.reset(new OutputCollector());
+        output1best = false;
+      } else {
+        // nbest to file, 1-best to stdout
+        nbestOut.reset(new ofstream(nbestFile.c_str()));
+        if (!nbestOut->good()) {
+          TRACE_ERR("ERROR: Failed to open " << nbestFile << " for nbest lists" << endl);
+          exit(1);
+        }
+        nbestCollector.reset(new OutputCollector(nbestOut.get()));
+      }
+    }
+    size_t latticeSamplesSize = staticData.GetLatticeSamplesSize();
+    string latticeSamplesFile = staticData.GetLatticeSamplesFilePath();
+    if (latticeSamplesSize) {
+      if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") {
+        latticeSamplesCollector.reset(new OutputCollector());
+        output1best = false;
+      } else {
+        latticeSamplesOut.reset(new ofstream(latticeSamplesFile.c_str()));
+        if (!latticeSamplesOut->good()) {
+          TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl);
+          exit(1);
+        }
+        latticeSamplesCollector.reset(new OutputCollector(latticeSamplesOut.get()));
+      }
+    }
+    if (output1best) {
+      outputCollector.reset(new OutputCollector());
+    }
+
+    // initialize stream for word graph (aka: output lattice)
+    auto_ptr<OutputCollector> wordGraphCollector;
+    if (staticData.GetOutputWordGraph()) {
+      wordGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputWordGraphStream())));
+    }
+
+    // initialize stream for search graph
+    // note: this is essentially the same as above, but in a different format
+    auto_ptr<OutputCollector> searchGraphCollector;
+    if (staticData.GetOutputSearchGraph()) {
+      searchGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputSearchGraphStream())));
+    }
+
+    // initialize stram for details about the decoder run
+    auto_ptr<OutputCollector> detailedTranslationCollector;
+    if (staticData.IsDetailedTranslationReportingEnabled()) {
+      detailedTranslationCollector.reset(new OutputCollector(&(ioWrapper->GetDetailedTranslationReportingStream())));
+    }
+
+    // initialize stram for word alignment between input and output
+    auto_ptr<OutputCollector> alignmentInfoCollector;
+    if (!staticData.GetAlignmentOutputFile().empty()) {
+      alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
+    }
+
+    //initialise stream for unknown (oov) words
+    auto_ptr<OutputCollector> unknownsCollector;
+    auto_ptr<ofstream> unknownsStream;
+    if (!staticData.GetOutputUnknownsFile().empty()) {
+      unknownsStream.reset(new ofstream(staticData.GetOutputUnknownsFile().c_str()));
+      if (!unknownsStream->good()) {
+        TRACE_ERR("Unable to open " << staticData.GetOutputUnknownsFile() << " for unknowns");
+        exit(1);
+      }
+      unknownsCollector.reset(new OutputCollector(unknownsStream.get()));
+    }
+
+#ifdef WITH_THREADS
+    ThreadPool pool(staticData.ThreadCount());
+#endif
+
+    // main loop over set of input sentences
+    InputType* source = NULL;
+    size_t lineCount = staticData.GetStartTranslationId();
+    while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
+      IFVERBOSE(1) {
+        ResetUserTime();
+      }
+      // set up task of translating one sentence
+      TranslationTask* task =
+        new TranslationTask(lineCount,source, outputCollector.get(),
+                            nbestCollector.get(),
+                            latticeSamplesCollector.get(),
+                            wordGraphCollector.get(),
+                            searchGraphCollector.get(),
+                            detailedTranslationCollector.get(),
+                            alignmentInfoCollector.get(),
+                            unknownsCollector.get(),
+                            staticData.GetOutputSearchGraphSLF(),
+                            staticData.GetOutputSearchGraphHypergraph());
+      // execute task
+#ifdef WITH_THREADS
+      if (my_acnt)
+	{
+	  task->Run();
+	  delete task;
+	  string src,trg,aln;
+	  UTIL_THROW_IF2(!getline(spe_src,src), "[" << HERE << "] "
+			 << "missing update data for simulated post-editing.");
+	  UTIL_THROW_IF2(!getline(spe_trg,trg), "[" << HERE << "] "
+			 << "missing update data for simulated post-editing.");
+	  UTIL_THROW_IF2(!getline(spe_aln,aln), "[" << HERE << "] "
+			 << "missing update data for simulated post-editing.");
+	  BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
+	    {
+	      Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
+	      if (sapt) sapt->add(src,trg,aln);
+	      VERBOSE(1,"[" << HERE << " added src] " << src << endl);
+	      VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
+	      VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
+	    }
+	}
+      else pool.Submit(task);
+#else
+      task->Run();
+      delete task;
+#endif
+
+      source = NULL; //make sure it doesn't get deleted
+      ++lineCount;
+    }
+
+    // we are done, finishing up
+#ifdef WITH_THREADS
+    pool.Stop(true); //flush remaining jobs
+#endif
+
+    delete ioWrapper;
+    FeatureFunction::Destroy();
+
+  } catch (const std::exception &e) {
+    std::cerr << "Exception: " << e.what() << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  IFVERBOSE(1) util::PrintUsage(std::cerr);
+
+#ifndef EXIT_RETURN
+  //This avoids that destructors are called (it can take a long time)
+  exit(EXIT_SUCCESS);
+#else
+  return EXIT_SUCCESS;
+#endif
+}

From 4d41211c2cd6eb75c5a229c10e98fdfa1acff3b4 Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Wed, 9 Jul 2014 02:41:28 +0100
Subject: [PATCH 34/84] Major overhaul of Mmsapt. Reorganization of old and
 addition of new features in phrase tables. Many critical bug fixes.

---
 .gitignore                                    |    1 +
 Jamroot                                       |    4 +-
 OnDiskPt/queryOnDiskPt.cpp                    |    2 +-
 contrib/server/mosesserver.cpp                |   39 +-
 moses-cmd/Jamfile                             |    9 +-
 moses/BitmapContainer.cpp                     |   14 +-
 moses/Manager.cpp                             |    4 +-
 moses/TranslationModel/UG/Jamfile             |   35 +-
 .../program_options/ug_splice_arglist.cc      |   50 +
 .../program_options/ug_splice_arglist.h       |   18 +
 moses/TranslationModel/UG/mm/Jamfile          |   19 +-
 moses/TranslationModel/UG/mm/custom-pt.cc     |    9 +-
 moses/TranslationModel/UG/mm/ug_bitext.cc     |  183 +--
 moses/TranslationModel/UG/mm/ug_bitext.h      |   39 +-
 moses/TranslationModel/UG/mm/ug_im_ttrack.h   |   34 +-
 .../UG/mm/ug_lexical_phrase_scorer2.h         |   21 +-
 moses/TranslationModel/UG/mm/ug_phrasepair.cc |   97 ++
 moses/TranslationModel/UG/mm/ug_phrasepair.h  |  243 ++++
 .../UG/mm/ug_tsa_tree_iterator.h              |   46 +-
 moses/TranslationModel/UG/mmsapt.cpp          | 1034 ++++++++++-------
 moses/TranslationModel/UG/mmsapt.h            |   87 +-
 moses/TranslationModel/UG/mmsapt_align.cc     |  607 +++++-----
 .../UG/mmsapt_phrase_scorers.h                |  269 +----
 moses/TranslationModel/UG/ptable-lookup.cc    |   14 +-
 moses/TranslationModel/UG/sapt_phrase_key.h   |   13 +
 .../TranslationModel/UG/sapt_phrase_scorers.h |   12 +
 moses/TranslationModel/UG/sapt_pscore_base.h  |  103 ++
 .../UG/sapt_pscore_coherence.h                |   33 +
 moses/TranslationModel/UG/sapt_pscore_lex1.h  |   70 ++
 .../TranslationModel/UG/sapt_pscore_logcnt.h  |   65 ++
 moses/TranslationModel/UG/sapt_pscore_pbwd.h  |   58 +
 moses/TranslationModel/UG/sapt_pscore_pfwd.h  |   70 ++
 .../UG/sapt_pscore_provenance.h               |   47 +
 .../UG/sapt_pscore_rareness.h                 |   41 +
 .../UG/sapt_pscore_unaligned.h                |   67 ++
 moses/TranslationModel/UG/sim-pe.cc           |   83 ++
 moses/TranslationModel/UG/try-align.cc        |   47 +-
 .../fuzzy-match/FuzzyMatchWrapper.cpp         |    4 +-
 moses/TypeDef.h                               |    6 +-
 moses/Util.h                                  |    4 +
 scripts/server/moses.py                       |   10 +-
 scripts/server/sim-pe.py                      |   57 +-
 42 files changed, 2365 insertions(+), 1303 deletions(-)
 create mode 100644 moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
 create mode 100644 moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
 create mode 100644 moses/TranslationModel/UG/mm/ug_phrasepair.cc
 create mode 100644 moses/TranslationModel/UG/mm/ug_phrasepair.h
 create mode 100644 moses/TranslationModel/UG/sapt_phrase_key.h
 create mode 100644 moses/TranslationModel/UG/sapt_phrase_scorers.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_base.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_coherence.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_lex1.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_logcnt.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_pbwd.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_pfwd.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_provenance.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_rareness.h
 create mode 100644 moses/TranslationModel/UG/sapt_pscore_unaligned.h
 create mode 100644 moses/TranslationModel/UG/sim-pe.cc

diff --git a/.gitignore b/.gitignore
index f870bed033..e7c37d86c6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -79,3 +79,4 @@ nbproject/
 mingw/MosesGUI/MosesGUI.e4p
 mingw/MosesGUI/_eric4project/
 
+contrib/m4m/merge-sorted
diff --git a/Jamroot b/Jamroot
index 283b4dd6f9..79ec39940f 100644
--- a/Jamroot
+++ b/Jamroot
@@ -152,13 +152,15 @@ build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses
 if [ option.get "with-mm" : : "yes" ]
 {
  alias mm :  
+  moses/TranslationModel/UG//spe-check-coverage2
   moses/TranslationModel/UG//ptable-lookup 
+  moses/TranslationModel/UG//sim-pe 
+  moses/TranslationModel/UG//spe-check-coverage 
   moses/TranslationModel/UG/mm//mtt-build 
   moses/TranslationModel/UG/mm//mtt-dump 
   moses/TranslationModel/UG/mm//symal2mam 
   moses/TranslationModel/UG/mm//mam2symal 
   moses/TranslationModel/UG/mm//mam_verify 
-  moses/TranslationModel/UG/mm//custom-pt 
   moses/TranslationModel/UG/mm//mmlex-build 
   moses/TranslationModel/UG/mm//mmlex-lookup 
   moses/TranslationModel/UG/mm//mtt-count-words 
diff --git a/OnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp
index a38fc5435f..77576d9565 100644
--- a/OnDiskPt/queryOnDiskPt.cpp
+++ b/OnDiskPt/queryOnDiskPt.cpp
@@ -22,7 +22,7 @@ int main(int argc, char **argv)
 {
   int tableLimit = 20;
   std::string ttable = "";
-  bool useAlignments = false;
+  // bool useAlignments = false;
 
   for(int i = 1; i < argc; i++) {
     if(!strcmp(argv[i], "-tlimit")) {
diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp
index 1ff11f0ae2..f14111f331 100644
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@@ -4,6 +4,7 @@
 #include <algorithm>
 
 
+#include "moses/Util.h"
 #include "moses/ChartManager.h"
 #include "moses/Hypothesis.h"
 #include "moses/Manager.h"
@@ -59,7 +60,7 @@ class Updater: public xmlrpc_c::method
     if(add2ORLM_) {
       //updateORLM();
     }
-    cerr << "Done inserting\n";
+    XVERBOSE(1,"Done inserting\n");
     //PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
     map<string, xmlrpc_c::value> retData;
     //*retvalP = xmlrpc_c::value_struct(retData);
@@ -120,17 +121,17 @@ class Updater: public xmlrpc_c::method
     if(si == params.end())
       throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE);
     source_ = xmlrpc_c::value_string(si->second);
-    cerr << "source = " << source_ << endl;
+    XVERBOSE(1,"source = " << source_ << endl);
     si = params.find("target");
     if(si == params.end())
       throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE);
     target_ = xmlrpc_c::value_string(si->second);
-    cerr << "target = " << target_ << endl;
+    XVERBOSE(1,"target = " << target_ << endl);
     si = params.find("alignment");
     if(si == params.end())
       throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE);
     alignment_ = xmlrpc_c::value_string(si->second);
-    cerr << "alignment = " << alignment_ << endl;
+    XVERBOSE(1,"alignment = " << alignment_ << endl);
     si = params.find("bounded");
     bounded_ = (si != params.end());
     si = params.find("updateORLM");
@@ -224,7 +225,7 @@ class Translator : public xmlrpc_c::method
     }
     const string source((xmlrpc_c::value_string(si->second)));
 
-    cerr << "Input: " << source << endl;
+    XVERBOSE(1,"Input: " << source << endl);
     si = params.find("align");
     bool addAlignInfo = (si != params.end());
     si = params.find("word-align");
@@ -287,13 +288,13 @@ class Translator : public xmlrpc_c::method
         }
     } else {
         Sentence sentence;
-        const vector<FactorType> &inputFactorOrder =
-          staticData.GetInputFactorOrder();
+        const vector<FactorType> &
+	  inputFactorOrder = staticData.GetInputFactorOrder();
         stringstream in(source + "\n");
         sentence.Read(in,inputFactorOrder);
 	size_t lineNumber = 0; // TODO: Include sentence request number here?
         Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm());
-        manager.ProcessSentence();
+	manager.ProcessSentence();
         const Hypothesis* hypo = manager.GetBestHypothesis();
 
         vector<xmlrpc_c::value> alignInfo;
@@ -331,7 +332,7 @@ class Translator : public xmlrpc_c::method
     pair<string, xmlrpc_c::value>
     text("text", xmlrpc_c::value_string(out.str()));
     retData.insert(text);
-    cerr << "Output: " << out.str() << endl;
+    XVERBOSE(1,"Output: " << out.str() << endl);
     *retvalP = xmlrpc_c::value_struct(retData);
   }
 
@@ -574,7 +575,7 @@ int main(int argc, char** argv)
 {
 
   //Extract port and log, send other args to moses
-  char** mosesargv = new char*[argc+2];
+  char** mosesargv = new char*[argc+2]; // why "+2" [UG]
   int mosesargc = 0;
   int port = 8080;
   const char* logfile = "/dev/null";
@@ -634,11 +635,11 @@ int main(int argc, char** argv)
   myRegistry.addMethod("updater", updater);
   myRegistry.addMethod("optimize", optimizer);
 
-   xmlrpc_c::serverAbyss myAbyssServer(
-					myRegistry,
-					port,              // TCP port on which to listen
-					logfile
-					);
+  xmlrpc_c::serverAbyss myAbyssServer(
+				      myRegistry,
+				      port,              // TCP port on which to listen
+				      logfile
+				      );
   /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
   xmlrpc_c::serverAbyss myAbyssServer(
     xmlrpc_c::serverAbyss::constrOpt()
@@ -648,12 +649,10 @@ int main(int argc, char** argv)
     .allowOrigin("*")
   );
   */
-
-  cerr << "Listening on port " << port << endl;
+  
+  XVERBOSE(1,"Listening on port " << port << endl);
   if (isSerial) {
-    while(1) {
-      myAbyssServer.runOnce();
-    }
+    while(1) myAbyssServer.runOnce();
   } else {
     myAbyssServer.run();
   }
diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile
index bddc109110..d257cd26cf 100644
--- a/moses-cmd/Jamfile
+++ b/moses-cmd/Jamfile
@@ -3,4 +3,11 @@ alias deps : IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp ..//z
 exe moses : Main.cpp deps ;
 exe lmbrgrid : LatticeMBRGrid.cpp deps ;
 
-alias programs : moses lmbrgrid ;
+exe simulate-pe : 
+simulate-pe.cc 
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_program_options 
+deps 
+;
+
+alias programs : moses lmbrgrid simulate-pe ;
diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp
index 981b04895a..ee2d55fc8b 100644
--- a/moses/BitmapContainer.cpp
+++ b/moses/BitmapContainer.cpp
@@ -161,13 +161,17 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
   }
 
   if (m_translations.size() > 1) {
-	UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
-			"Non-monotonic future score");
+    UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(),
+		   "Non-monotonic future score: " 
+		   << m_translations.Get(0)->GetFutureScore() << " vs. " 
+		   << m_translations.Get(1)->GetFutureScore());
   }
 
   if (m_hypotheses.size() > 1) {
     UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(),
-			  "Non-monotonic total score");
+		   "Non-monotonic total score" 
+		   << m_hypotheses[0]->GetTotalScore() << " vs. "
+		   << m_hypotheses[1]->GetTotalScore());
   }
 
   HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
@@ -442,7 +446,9 @@ BitmapContainer::ProcessBestHypothesis()
   if (!Empty()) {
     HypothesisQueueItem *check = Dequeue(true);
     UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(),
-    		"Non-monotonic total score");
+		   "Non-monotonic total score: "
+		   << item->GetHypothesis()->GetTotalScore() << " vs. "
+		   << check->GetHypothesis()->GetTotalScore());
   }
 
   // Logging for the criminally insane
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 6bc82378ea..196f4d9971 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -105,7 +105,9 @@ void Manager::ProcessSentence()
   // some reporting on how long this took
   IFVERBOSE(1) {
     GetSentenceStats().StopTimeCollectOpts();
-    TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took " << GetSentenceStats().GetTimeCollectOpts() << " seconds" << endl);
+    TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took " 
+	      << GetSentenceStats().GetTimeCollectOpts() << " seconds at " 
+	      << __FILE__ << ":" << __LINE__ << endl);
   }
 
   // search for best translation with the specified algorithm
diff --git a/moses/TranslationModel/UG/Jamfile b/moses/TranslationModel/UG/Jamfile
index ecd175a653..c36d4a072b 100644
--- a/moses/TranslationModel/UG/Jamfile
+++ b/moses/TranslationModel/UG/Jamfile
@@ -20,6 +20,39 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
 $(TOP)/util//kenutil 
 ; 
 
+exe sim-pe : 
+sim-pe.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil 
+; 
+
+exe spe-check-coverage : 
+spe-check-coverage.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil 
+; 
+
+exe spe-check-coverage2 : 
+spe-check-coverage2.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil 
+; 
+
 install $(PREFIX)/bin : try-align ; 
 
-fakelib mmsapt : [ glob *.cpp mmsapt*.cc ] ;
+fakelib mmsapt : [ glob *.cpp mmsapt*.cc sapt*.cc ] ;
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
new file mode 100644
index 0000000000..7dc2cd18f0
--- /dev/null
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
@@ -0,0 +1,50 @@
+//-*- c++ -*-
+#include "ug_splice_arglist.h"
+#include "moses/Util.h"
+#include "util/exception.hh"
+#include <boost/foreach.hpp>
+
+namespace Moses {
+  
+  void 
+  filter_arguments(int const argc_in, char const* const* const argv_in,
+		   int & argc_moses, char*** argv_moses,  
+		   int & argc_other, char*** argv_other,
+		   vector<pair<string,int> > const& filter)
+  {
+    *argv_moses = new char*[argc_in];
+    *argv_other = new char*[argc_in]; 
+    (*argv_moses)[0] = new char[strlen(argv_in[0])+1];
+    strcpy((*argv_moses)[0], argv_in[0]);
+    argc_moses = 1;
+    argc_other = 0;
+    typedef pair<string,int> option;
+    int i = 1;
+    while (i < argc_in)
+      {
+	BOOST_FOREACH(option const& o, filter)
+	  {
+	    if (o.first == argv_in[i])
+	      {
+		(*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
+		strcpy((*argv_other)[argc_other++],argv_in[i]);
+		for (int k = 0; k < o.second; ++k)
+		{
+		  UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-', 
+				 "[" << HERE << "] Missing argument for "
+				 << "parameter " << o.first << "!");
+		  (*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
+		  strcpy((*argv_other)[argc_other++],argv_in[i]);
+		}
+		if (++i >= argc_in) break;
+	      }
+	  }
+	if (i >= argc_in) break;
+	(*argv_moses)[argc_moses] = new char[strlen(argv_in[i])+1];
+	strcpy((*argv_moses)[argc_moses++], argv_in[i++]);
+      }
+  }
+  
+} // namespace Moses
+
+
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
new file mode 100644
index 0000000000..e56585e8ab
--- /dev/null
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
@@ -0,0 +1,18 @@
+//-*- c++ -*-
+#pragma once
+#include <vector>
+#include <string>
+namespace Moses {
+  using namespace std;
+
+  // Function to splice the argument list (e.g. before handing it over to 
+  // Moses LoadParam() function. /filter/ is a vector of argument names
+  // and the number of arguments after each of them 
+  void 
+  filter_arguments(int const argc_in, char const* const* const argv_in,
+		   int & argc_moses, char*** argv_moses,  
+		   int & argc_other, char*** argv_other,
+		   vector<pair<string,int> > const& filter);
+
+
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/mm/Jamfile b/moses/TranslationModel/UG/mm/Jamfile
index 2cc923581f..8d8af050a2 100644
--- a/moses/TranslationModel/UG/mm/Jamfile
+++ b/moses/TranslationModel/UG/mm/Jamfile
@@ -72,15 +72,15 @@ $(TOP)/moses/TranslationModel/UG/mm//mm
 $(TOP)/util//kenutil 
 ; 
 
-exe custom-pt : 
-custom-pt.cc 
-$(TOP)/moses//moses
-$(TOP)//boost_iostreams 
-$(TOP)//boost_program_options 
-$(TOP)/moses/TranslationModel/UG/mm//mm 
-$(TOP)/moses/TranslationModel/UG/generic//generic 
-$(TOP)/util//kenutil 
-; 
+# exe custom-pt : 
+# custom-pt.cc 
+# $(TOP)/moses//moses
+# $(TOP)//boost_iostreams 
+# $(TOP)//boost_program_options 
+# $(TOP)/moses/TranslationModel/UG/mm//mm 
+# $(TOP)/moses/TranslationModel/UG/generic//generic 
+# $(TOP)/util//kenutil 
+# ; 
 
 
 exe calc-coverage : 
@@ -98,7 +98,6 @@ mtt-dump
 mtt-count-words 
 symal2mam 
 mam2symal 
-custom-pt 
 mmlex-build 
 mmlex-lookup
 mam_verify 
diff --git a/moses/TranslationModel/UG/mm/custom-pt.cc b/moses/TranslationModel/UG/mm/custom-pt.cc
index 1c1e0893c4..e52772b484 100644
--- a/moses/TranslationModel/UG/mm/custom-pt.cc
+++ b/moses/TranslationModel/UG/mm/custom-pt.cc
@@ -1,6 +1,6 @@
 // build a phrase table for the given input
 // #include "ug_lexical_phrase_scorer2.h"
-
+#if 0
 #include <stdint.h>
 #include <string>
 #include <vector>
@@ -25,7 +25,7 @@
 #include "ug_bitext.h"
 #include "../mmsapt_phrase_scorers.h"
 #include "ug_lexical_phrase_scorer2.h"
-
+#include "../sapt_phrase_scorers.h"
 using namespace std;
 using namespace ugdiss;
 using namespace Moses;
@@ -109,6 +109,7 @@ nbest_phrasepairs(uint64_t const  pid1,
 int main(int argc, char* argv[])
 {
   // assert(argc == 4);
+#if 0
 #if 0
   string base = argv[1];
   string L1   = argv[2];
@@ -182,7 +183,7 @@ int main(int argc, char* argv[])
       	    }
       	}
     }
-  
+#endif  
     exit(0);
 }
-
+#endif
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc
index 8dbbdcb926..a1a6dff7bf 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext.cc
@@ -158,99 +158,25 @@ namespace Moses
     jstats::
     invalidate()
     {
-      my_rcnt = 0;
+      if (my_wcnt > 0) 
+	my_wcnt *= -1;
     }
 
-    bool
+    void 
     jstats::
-    valid()
-    {
-      return my_rcnt != 0;
-    }
-
-    bool
-    PhrasePair::
-    operator<=(PhrasePair const& other) const
+    validate()
     {
-      return this->score <= other.score;
+      if (my_wcnt < 0) 
+	my_wcnt *= -1;
     }
 
     bool
-    PhrasePair::
-    operator>=(PhrasePair const& other) const
-    {
-      return this->score >= other.score;
-    }
-
-    bool
-    PhrasePair::
-    operator<(PhrasePair const& other) const
-    {
-      return this->score < other.score;
-    }
-    
-    bool
-    PhrasePair::
-    operator>(PhrasePair const& other) const
-    {
-      return this->score > other.score;
-    }
-    
-    PhrasePair::
-    PhrasePair() {}
-
-    PhrasePair::
-    PhrasePair(PhrasePair const& o) 
-      : p1(o.p1), 
-	p2(o.p2),
-	raw1(o.raw1), 
-	raw2(o.raw2), 
-	sample1(o.sample1),
-	sample2(o.sample2),
-	good1(o.good1),
-	good2(o.good2),
-	joint(o.joint),
-	fvals(o.fvals),
-	aln(o.aln),
-	score(o.score)
-    {
-      for (size_t i = 0; i <= po_other; ++i)
-	{
-	  dfwd[i] = o.dfwd[i];
-	  dbwd[i] = o.dbwd[i];
-	}
-    }
-    
-    void
-    PhrasePair::
-    init(uint64_t const pid1, pstats const& ps, size_t const numfeats)
+    jstats::
+    valid()
     {
-      p1      = pid1;
-      p2      = 0;
-      raw1    = ps.raw_cnt;
-      sample1 = ps.sample_cnt;
-      sample2 = 0;
-      good1   = ps.good;
-      good2   = 0;
-      raw2    = 0;
-      fvals.resize(numfeats);
+      return my_wcnt >= 0;
     }
 
-    void
-    PhrasePair::
-    init(uint64_t const pid1, 
-	 pstats const& ps1, 
-	 pstats const& ps2, 
-	 size_t const numfeats)
-    {
-      p1      = pid1;
-      raw1    = ps1.raw_cnt    + ps2.raw_cnt;
-      sample1 = ps1.sample_cnt + ps2.sample_cnt;
-      sample2 = 0;
-      good1   = ps1.good       + ps2.good;
-      good2   = 0;
-      fvals.resize(numfeats);
-    }
     
     float 
     lbop(size_t const tries, size_t const succ, float const confidence)
@@ -261,85 +187,6 @@ namespace Moses
 		 find_lower_bound_on_p(tries, succ, confidence)));
     }
     
-    PhrasePair const&
-    PhrasePair::
-    update(uint64_t const pid2, jstats const& js)   
-    {
-      p2    = pid2;
-      raw2  = js.cnt2();
-      joint = js.rcnt();
-      assert(js.aln().size());
-      if (js.aln().size()) 
-	aln = js.aln()[0].second;
-      float total_fwd = 0, total_bwd = 0;
-      for (int i = po_first; i <= po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  total_fwd += js.dcnt_fwd(po)+1;
-	  total_bwd += js.dcnt_bwd(po)+1;
-	}
-      for (int i = po_first; i <= po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
-	  dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
-	}
-      return *this;
-    }
-
-    PhrasePair const&
-    PhrasePair::
-    update(uint64_t const pid2, jstats const& js1, jstats const& js2)   
-    {
-      p2    = pid2;
-      raw2  = js1.cnt2() + js2.cnt2();
-      joint = js1.rcnt() + js2.rcnt();
-      assert(js1.aln().size() || js2.aln().size());
-      if (js1.aln().size()) 
-	aln = js1.aln()[0].second;
-      else if (js2.aln().size()) 
-	aln = js2.aln()[0].second;
-      for (int i = po_first; i < po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other);
-	  dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other);
-	}
-      return *this;
-    }
-
-    PhrasePair const&
-    PhrasePair::
-    update(uint64_t const pid2, 
-	   size_t   const raw2extra,
-	   jstats   const& js)   
-    {
-      p2    = pid2;
-      raw2  = js.cnt2() + raw2extra;
-      joint = js.rcnt();
-      assert(js.aln().size());
-      if (js.aln().size()) 
-	aln = js.aln()[0].second;
-      for (int i = po_first; i <= po_other; i++)
-	{
-	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
-	  dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other);
-	  dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other);
-	}
-      return *this;
-    }
-
-    float
-    PhrasePair::
-    eval(vector<float> const& w)
-    {
-      assert(w.size() == this->fvals.size());
-      this->score = 0;
-      for (size_t i = 0; i < w.size(); ++i)
-	this->score += w[i] * this->fvals[i];
-      return this->score;
-    }
-  
     template<>
     sptr<imBitext<L2R_Token<SimpleWordId> > > 
     imBitext<L2R_Token<SimpleWordId> >::
@@ -371,7 +218,8 @@ namespace Moses
 	  uint32_t row,col; char c;
 	  while (ibuf >> row >> c >> col)
 	    {
-	      assert(c == '-');
+	      UTIL_THROW_IF2(c != '-', "[" << HERE << "] "
+			     << "Error in alignment information:\n" << a);
 	      binwrite(obuf,row);
 	      binwrite(obuf,col);
 	    }
@@ -639,7 +487,6 @@ namespace Moses
       cout  << string(90,'-') << endl;
     }
 
-
     PhraseOrientation 
     find_po_fwd(vector<vector<ushort> >& a1,
 		vector<vector<ushort> >& a2,
@@ -654,13 +501,13 @@ namespace Moses
       
       ushort ns1,ne1,ne2;
       if (!expand_phrase_pair(a1,a2,n2,b1,e1,ns1,ne1,ne2))
-	{
-	  return po_other;
-	}
+	return po_other;
+
       if (ns1 >= e1)
 	{
 	  for (ushort j = e1; j < ns1; ++j)
-	    if (a1[j].size()) return po_jfwd;
+	    if (a1[j].size()) 
+	      return po_jfwd;
 	  return po_mono;
 	}
       else
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h
index 3972539737..4cb34c02d9 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@@ -56,6 +56,7 @@ namespace Moses {
   class Mmsapt;
   namespace bitext
   {
+    template<typename TKN> class Bitext;
     using namespace ugdiss;
 
     template<typename TKN> class Bitext;
@@ -120,6 +121,7 @@ namespace Moses {
       void add(float w, vector<uchar> const& a, uint32_t const cnt2,
 	       uint32_t fwd_orient, uint32_t bwd_orient);
       void invalidate();
+      void validate();
       bool valid();
       uint32_t dcnt_fwd(PhraseOrientation const idx) const;
       uint32_t dcnt_bwd(PhraseOrientation const idx) const;
@@ -157,43 +159,6 @@ namespace Moses {
 	  uint32_t fwd_o, uint32_t bwd_o);
     };
     
-    class 
-    PhrasePair
-    {
-    public:
-      uint64_t p1, p2;
-      uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
-      vector<float> fvals;
-      float dfwd[po_other+1];
-      float dbwd[po_other+1];
-      vector<uchar> aln;
-      // float    avlex12,avlex21; // average lexical probs (Moses std)
-      // float    znlex1,znlex2;   // zens-ney lexical smoothing
-      // float    colex1,colex2;   // based on raw lexical occurrences
-      float score;
-      PhrasePair();
-      PhrasePair(PhrasePair const& o);
-      bool operator<(PhrasePair const& other) const;
-      bool operator>(PhrasePair const& other) const;
-      bool operator<=(PhrasePair const& other) const;
-      bool operator>=(PhrasePair const& other) const;
-
-      void init(uint64_t const pid1, pstats const& ps,  size_t const numfeats);
-      void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2, 
-		size_t const numfeats);
-
-      PhrasePair const& 
-      update(uint64_t const pid2, jstats const& js);
-
-      PhrasePair const& 
-      update(uint64_t const pid2, jstats   const& js1, jstats   const& js2);
-
-      PhrasePair const& 
-      update(uint64_t const pid2, size_t const raw2extra, jstats const& js);
-
-      float eval(vector<float> const& w);
-    };
-
 
     template<typename TKN>
     class Bitext 
diff --git a/moses/TranslationModel/UG/mm/ug_im_ttrack.h b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
index 05066c922f..0c6e4afbf6 100644
--- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
@@ -16,6 +16,9 @@
 #include "tpt_tokenindex.h"
 #include "ug_ttrack_base.h"
 #include "tpt_tokenindex.h"
+#include "util/exception.hh"
+#include "moses/Util.h"
+
 // #include "ug_vocab.h"
 
 // define the corpus buffer size (in sentences) and the
@@ -49,6 +52,8 @@ namespace ugdiss
     typename boost::shared_ptr<imTtrack<Token> > 
     append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);
 
+    void m_check_token_count(); // debugging function
+
   public:
 
     imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
@@ -69,6 +74,22 @@ namespace ugdiss
 
   };
 
+  template<typename Token>
+  void
+  imTtrack<Token>::
+  m_check_token_count()
+  { // sanity check
+    size_t check = 0;
+    BOOST_FOREACH(vector<Token> const& s, *myData)
+      check += s.size();
+    UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]" 
+		   << " Wrong token count after appending sentence!"
+		   << " Counted " << check << " but expected " 
+		   << this->numToks << " in a total of " << myData->size() 
+		   << " sentences.");
+    
+  }
+
   template<typename Token>
   Token const* 
   imTtrack<Token>::
@@ -111,9 +132,9 @@ namespace ugdiss
   template<typename Token>
   imTtrack<Token>::
   imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL)
+    : numToks(0)
   {
     myData.reset(new vector<vector<Token> >());
-    numToks = 0;
     string line,w;
     size_t linectr=0;
     boost::unordered_map<string,id_type> H;
@@ -135,6 +156,7 @@ namespace ugdiss
   template<typename Token>
   imTtrack<Token>::
   imTtrack(size_t reserve)
+    : numToks(0)
   {
     myData.reset(new vector<vector<Token> >());
     if (reserve) myData->reserve(reserve);
@@ -143,9 +165,9 @@ namespace ugdiss
   template<typename Token>
   imTtrack<Token>::
   imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
+    : numToks(0)
   {
     myData  = d;
-    numToks = 0;
     BOOST_FOREACH(vector<Token> const& v, *d)
       numToks += v.size();
   }
@@ -171,6 +193,9 @@ namespace ugdiss
   shared_ptr<imTtrack<TOKEN> > 
   append(shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
   {
+#if 1
+    if (crp) crp->m_check_token_count();
+#endif
     shared_ptr<imTtrack<TOKEN> > ret;
     if (crp == NULL)
       {
@@ -185,6 +210,11 @@ namespace ugdiss
       }
     else ret = crp;
     ret->myData->push_back(snt);
+    ret->numToks += snt.size();
+
+#if 1
+    ret->m_check_token_count();
+#endif
     return ret;
   }
 
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
index 558b5a7fa9..b7e3592233 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
@@ -27,7 +27,6 @@ namespace ugdiss
     typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> table_t;
     table_t COOC;
     void open(string const& fname);
-
     template<typename someint>
     void 
     score(TKN const* snt1, size_t const s1, size_t const e1,
@@ -104,7 +103,19 @@ namespace ugdiss
     if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
     UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__
 		   << ": alpha parameter must be >= 0");
-    return float(COOC[s][t]+alpha)/(COOC.m1(s)+alpha);
+    float ret = COOC[s][t]+alpha;
+    ret =  (ret?ret:1.)/(COOC.m1(s)+alpha);
+    UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__ 
+		   << ": result not > 0 and <= 1. alpha = " << alpha << "; "
+		   << COOC[s][t] << "/" << COOC.m1(s));
+
+#if 0
+    cerr << "[" << s << "," << t << "] " 
+	 << COOC.m1(s) << "/" 
+	 << COOC[s][t] << "/" 
+	 << COOC.m2(t) << endl;
+#endif
+    return ret;
   }
   
   template<typename TKN>
@@ -115,7 +126,11 @@ namespace ugdiss
     if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
     UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__
 		   << ": alpha parameter must be >= 0");
-    return float(COOC[s][t]+alpha)/(COOC.m2(t)+alpha);
+    float ret = float(COOC[s][t]+alpha);
+    ret = (ret?ret:1.)/(COOC.m2(t)+alpha);
+    UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__ 
+		   << ": result not > 0 and <= 1.");
+    return ret;
   }
   
   template<typename TKN>
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.cc b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
new file mode 100644
index 0000000000..6373f84688
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
@@ -0,0 +1,97 @@
+#include "ug_phrasepair.h"
+namespace Moses {
+  namespace bitext
+  {
+
+#if 0
+    void 
+    PhrasePair::
+    init()
+    {
+      p1 = p2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0;
+    }
+
+    void
+    PhrasePair::
+    init(uint64_t const pid1, 
+	 pstats const& ps1, 
+	 pstats const& ps2, 
+	 size_t const numfeats)
+    {
+      p1      = pid1;
+      raw1    = ps1.raw_cnt    + ps2.raw_cnt;
+      sample1 = ps1.sample_cnt + ps2.sample_cnt;
+      sample2 = 0;
+      good1   = ps1.good       + ps2.good;
+      good2   = 0;
+      joint   = 0;
+      fvals.resize(numfeats);
+    }
+
+    PhrasePair const&
+    PhrasePair::
+    update(uint64_t const pid2, jstats const& js1, jstats const& js2)   
+    {
+      p2    = pid2;
+      raw2  = js1.cnt2() + js2.cnt2();
+      joint = js1.rcnt() + js2.rcnt();
+      assert(js1.aln().size() || js2.aln().size());
+      if (js1.aln().size()) 
+	aln = js1.aln()[0].second;
+      else if (js2.aln().size()) 
+	aln = js2.aln()[0].second;
+      for (int i = po_first; i < po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other);
+	  dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other);
+	}
+      return *this;
+    }
+
+    PhrasePair const&
+    PhrasePair::
+    update(uint64_t const pid2, size_t r2)
+    {
+      p2    = pid2;
+      raw2  = r2;
+      joint = 0;
+      return *this;
+    } 
+
+
+    PhrasePair const&
+    PhrasePair::
+    update(uint64_t const pid2, 
+	   size_t   const raw2extra,
+	   jstats   const& js)   
+    {
+      p2    = pid2;
+      raw2  = js.cnt2() + raw2extra;
+      joint = js.rcnt();
+      assert(js.aln().size());
+      if (js.aln().size()) 
+	aln = js.aln()[0].second;
+      for (int i = po_first; i <= po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other);
+	  dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other);
+	}
+      return *this;
+    }
+
+    float
+    PhrasePair::
+    eval(vector<float> const& w)
+    {
+      assert(w.size() == this->fvals.size());
+      this->score = 0;
+      for (size_t i = 0; i < w.size(); ++i)
+	this->score += w[i] * this->fvals[i];
+      return this->score;
+    }
+#endif
+  } // namespace bitext
+} // namespace Moses
+
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h
new file mode 100644
index 0000000000..8cd43dc187
--- /dev/null
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@@ -0,0 +1,243 @@
+//-*- c++ -*-
+#pragma once
+#include "ug_bitext.h"
+
+using namespace ugdiss;
+using namespace std;
+
+namespace Moses {
+  namespace bitext
+  {
+
+    template<typename Token>
+    string 
+    toString(TokenIndex const& V, Token const* x, size_t const len)
+    {
+      if (!len) return "";
+      UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!");
+      ostringstream buf; 
+      buf << V[x->id()];
+      size_t i = 1;
+      for (x = x->next(); x && i < len; ++i, x = x->next())
+	buf << " " << V[x->id()];
+      UTIL_THROW_IF2(i != len, HERE << ": Unexpected end of phrase!");
+      return buf.str();
+    }
+
+    template<typename Token>
+    class 
+    PhrasePair
+    {
+    public:
+      Token const* start1;
+      Token const* start2;
+      uint32_t len1;
+      uint32_t len2;
+      // uint64_t p1, p2;
+      uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
+      vector<float> fvals;
+      float dfwd[po_other+1]; // distortion counts // counts or probs?
+      float dbwd[po_other+1]; // distortion counts
+      vector<uchar> aln;
+      float score;
+      PhrasePair() { };
+      PhrasePair(PhrasePair const& o);
+
+      PhrasePair const& operator+=(PhrasePair const& other);
+
+      bool operator<(PhrasePair const& other) const;
+      bool operator>(PhrasePair const& other) const;
+      bool operator<=(PhrasePair const& other) const; 
+      bool operator>=(PhrasePair const& other) const;
+
+      void init();
+      void init(Token const* x,   uint32_t const len,
+		pstats const* ps = NULL, size_t const numfeats=0);
+      
+      // void init(uint64_t const pid1, pstats const& ps,  size_t const numfeats);
+      // void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2, 
+      // size_t const numfeats);
+
+      // PhrasePair const&
+      // update(uint64_t const pid2, size_t r2 = 0);
+
+      PhrasePair const& 
+      update(Token const* x, uint32_t const len, jstats const& js);
+      
+      // PhrasePair const& 
+      // update(uint64_t const pid2, jstats   const& js1, jstats   const& js2);
+
+      // PhrasePair const& 
+      // update(uint64_t const pid2, size_t const raw2extra, jstats const& js);
+
+      // float 
+      // eval(vector<float> const& w);
+
+      class SortByTargetIdSeq
+      {
+      public:
+	int cmp(PhrasePair const& a, PhrasePair const& b) const;
+	bool operator()(PhrasePair const& a, PhrasePair const& b) const;
+      };
+    };
+
+    template<typename Token>
+    void
+    PhrasePair<Token>::
+    init(Token const* x, uint32_t const len, 
+	 pstats const* ps, size_t const numfeats)
+    {
+      start1 = x; len1 = len;
+      // p1      = pid1;
+      // p2      = 0;
+      if (ps)
+	{
+	  raw1    = ps->raw_cnt;
+	  sample1 = ps->sample_cnt;
+	  good1   = ps->good;
+	}
+      else raw1 = sample1 = good1 = 0;
+      joint   = 0;
+      good2   = 0;
+      sample2 = 0;
+      raw2    = 0;
+      fvals.resize(numfeats);
+    }
+
+    template<typename Token>
+    PhrasePair<Token> const&
+    PhrasePair<Token>::
+    update(Token const* x, uint32_t const len, jstats const& js)   
+    {
+      // p2    = pid2;
+      start2 = x; len2 = len;
+      raw2  = js.cnt2();
+      joint = js.rcnt();
+      assert(js.aln().size());
+      if (js.aln().size()) 
+	aln = js.aln()[0].second;
+      float total_fwd = 0, total_bwd = 0;
+      for (int i = po_first; i <= po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  total_fwd += js.dcnt_fwd(po)+1;
+	  total_bwd += js.dcnt_bwd(po)+1;
+	}
+
+      // should we do that here or leave the raw counts?
+      for (int i = po_first; i <= po_other; i++)
+	{
+	  PhraseOrientation po = static_cast<PhraseOrientation>(i);
+	  dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd;
+	  dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd;
+	}
+
+      return *this;
+    }
+
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator<(PhrasePair const& other) const 
+    { return this->score < other.score; }
+    
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator>(PhrasePair const& other) const
+    { return this->score > other.score; }
+
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator<=(PhrasePair const& other) const 
+    { return this->score <= other.score; }
+    
+    template<typename Token>
+    bool 
+    PhrasePair<Token>::
+    operator>=(PhrasePair const& other) const
+    { return this->score >= other.score; }
+
+    template<typename Token>
+    PhrasePair<Token> const&
+    PhrasePair<Token>::
+    operator+=(PhrasePair const& o) 
+    { 
+      raw1 += o.raw1;
+      raw2 += o.raw2;
+      sample1 += o.sample1;
+      sample2 += o.sample2;
+      good1 += o.good1;
+      good2 += o.good2;
+      joint += o.joint;
+      return *this;
+    }
+
+    template<typename Token>
+    PhrasePair<Token>::
+    PhrasePair(PhrasePair<Token> const& o) 
+      : start1(o.start1)
+      , start2(o.start2)
+      , len1(o.len1)
+      , len2(o.len2)
+      , raw1(o.raw1) 
+      , raw2(o.raw2) 
+      , sample1(o.sample1)
+      , sample2(o.sample2)
+      ,	good1(o.good1)
+      , good2(o.good2)
+      , joint(o.joint)
+      , fvals(o.fvals)
+      , aln(o.aln)
+      , score(o.score)
+    {
+      for (size_t i = 0; i <= po_other; ++i)
+	{
+	  dfwd[i] = o.dfwd[i];
+	  dbwd[i] = o.dbwd[i];
+	}
+    }
+    
+    template<typename Token>
+    int
+    PhrasePair<Token>::
+    SortByTargetIdSeq::
+    cmp(PhrasePair const& a, PhrasePair const& b) const
+    {
+      size_t i = 0;
+      Token const* x = a.start2;
+      Token const* y = b.start2;
+      while (i < a.len2 && i < b.len2 && x->id() == y->id()) 
+	{
+	  x = x->next();
+	  y = y->next();
+	  ++i;
+	}
+      if (i == a.len2 && i == b.len2) return 0;
+      if (i == a.len2) return -1;
+      if (i == b.len2) return  1;
+      return x->id() < y->id() ? -1 : 1;
+    }
+    
+    template<typename Token>
+    bool
+    PhrasePair<Token>::
+    SortByTargetIdSeq::
+    operator()(PhrasePair const& a, PhrasePair const& b) const
+    {
+      return this->cmp(a,b) < 0;
+    }
+
+    template<typename Token>
+    void 
+    PhrasePair<Token>::
+    init()
+    {
+      len1 = len2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0;
+      start1 = start2 = NULL;
+    }
+
+
+  } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
index 14bf6cdadb..ab7f96bf0b 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
@@ -7,6 +7,8 @@
 #include "ug_typedefs.h"
 #include "tpt_tokenindex.h"
 #include <iostream>
+#include "util/exception.hh"
+#include "moses/Util.h"
 //#include <cassert>
 
 // #include "ug_bv_iter.h"
@@ -60,8 +62,13 @@ namespace ugdiss
 
     // TSA_tree_iterator(TSA_tree_iterator const& other);
     TSA_tree_iterator(TSA<Token> const* s);
+    TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other);
     TSA_tree_iterator(TSA<Token> const* r, id_type const* s, size_t const len);
     // TSA_tree_iterator(TSA<Token> const* s, Token const& t);
+    TSA_tree_iterator(TSA<Token> const* s, 
+		      Token const* kstart, 
+		      size_t const len, 
+		      bool full_match_only=true);
     TSA_tree_iterator(TSA<Token> const* s, 
 		      Token const* kstart, 
 		      Token const* kend, 
@@ -150,9 +157,12 @@ namespace ugdiss
     double approxOccurrenceCount(int p=-1) const
     {
       assert(root);
+      if (p < 0) p += lower.size();
       double ret = arrayByteSpanSize(p)/root->aveIndexEntrySize();
-      assert(ret < root->corpus->numTokens());
       if (ret < 25) ret = rawCnt(p);
+      UTIL_THROW_IF2(ret > root->corpus->numTokens(), "[" << HERE << "] "
+		     << "Word count mismatch.");
+      assert(ret <= root->corpus->numTokens());
       return ret;
     }
 
@@ -318,6 +328,18 @@ namespace ugdiss
     : root(s) 
   {};
 
+  template<typename Token>
+  TSA_tree_iterator<Token>::
+  TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other)
+    : root(s) 
+  {
+    Token const* x = other.getToken(0);
+    for (size_t i = 0; i < other.size() && this->extend(x->id()); ++i)
+      x = x->next(); 
+  };
+
+
+
   template<typename Token>
   TSA_tree_iterator<Token>::
   TSA_tree_iterator
@@ -382,6 +404,25 @@ namespace ugdiss
 
 #endif
 
+  template<typename Token>
+  TSA_tree_iterator<Token>::
+  TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, 
+		    size_t const len, bool full_match_only)
+    : root(s) 
+  {
+    if (!root) return;
+    size_t i = 0;
+    for (; i < len && kstart && extend(*kstart); ++i)
+      kstart = kstart->next();
+    if (full_match_only && i != len) 
+      {
+        lower.clear();
+        upper.clear();
+      }
+  };
+
+  // DEPRECATED: DO NOT USE. Use the one that takes the length 
+  // instead of kend.
   template<typename Token>
   TSA_tree_iterator<Token>::
   TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, 
@@ -561,8 +602,7 @@ namespace ugdiss
   TSA_tree_iterator<Token>::
   rawCnt(int p) const
   {
-    if (p < 0)
-      p = lower.size()+p;
+    if (p < 0) p += lower.size();
     assert(p>=0);
     if (lower.size() == 0) return root->getCorpusSize();
     return root->rawCnt(lower[p],upper[p]);
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index dc99454728..596fec4e6c 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -1,13 +1,38 @@
 #include "mmsapt.h"
 #include <boost/foreach.hpp>
+#include <boost/scoped_ptr.hpp>
 #include <boost/tokenizer.hpp>
 #include <algorithm>
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
+#include "util/exception.hh"
+#include <set>
 
 namespace Moses
 {
   using namespace bitext;
   using namespace std;
   using namespace boost;
+
+
+  // uint64_t 
+  // pack_phrasekey(uint64_t const shard_id, uint64_t const snt_id, 
+  // 		 uint64_t const offset, uint64_t const len)
+  // {
+  //   uint64_t one = 1;
+  //   //  8 bits - 256 shards
+  //   // 13 bits - max offset
+  //   // 11 bits - max len
+  //   // 32 bits - max sentence id
+  //   UTIL_TRHOW_IF2(shard_id >= 256, "[" << HERE << "] " 
+  // 		   << "Sentence ID exceeds limit.");
+  //   UTIL_THROW_IF2(snt_id >= 4294967296, "[" << HERE << "] " 
+  // 		   << "Sentence ID exceeds limit.");
+  //   UTIL_TRHOW_IF2(offset >= 8192, "[" << HERE << "]" 
+  // 		   << "Phrase offset exceeds limit.");
+  //   UTIL_TRHOW_IF2(offset >= 2048, "[" << HERE << "]" 
+  // 		   << "Phrase length exceeds limit.");
+  //   return ((shard_id<<56)+(snt_id<<24)+(offset<<11)+len);
+  // }
   
   void 
   fillIdSeq(Phrase const& mophrase, size_t const ifactor,
@@ -23,7 +48,7 @@ namespace Moses
     
 
   void 
-  parseLine(string const& line, map<string,string> & params)
+  parseLine(string const& line, map<string,string> & param)
   {
     char_separator<char> sep("; ");
     tokenizer<char_separator<char> > tokens(line,sep);
@@ -32,9 +57,14 @@ namespace Moses
 	size_t i = t.find_first_not_of(" =");
 	size_t j = t.find_first_of(" =",i+1);
 	size_t k = t.find_first_not_of(" =",j+1);
+	UTIL_THROW_IF2(i == string::npos || k == string::npos,
+		       "[" << HERE << "] "
+		       << "Parameter specification error near '"
+		       << t << "' in moses ini line\n"
+		      << line);
 	assert(i != string::npos);
 	assert(k != string::npos);
-	params[t.substr(i,j)] = t.substr(k);
+	param[t.substr(i,j)] = t.substr(k);
       }
   }
 
@@ -57,13 +87,13 @@ namespace Moses
   Mmsapt::
   Mmsapt(string const& line)
     : PhraseDictionary(line)
-    , m_lex_alpha(1.0)
-    , withLogCountFeatures(false)
-    , withCoherence(true)
-    , m_pfwd_features("g")
-    , m_pbwd_features("g")
-    , withPbwd(true)
-    , poolCounts(true)
+      // , m_lex_alpha(1.0)
+      // , withLogCountFeatures(false)
+      // , withCoherence(true)
+      // , m_pfwd_features("g")
+      // , m_pbwd_features("g")
+      // , withPbwd(true)
+      // , poolCounts(true)
     , ofactor(1,0)
     , m_tpc_ctr(0)
   {
@@ -92,83 +122,127 @@ namespace Moses
       }
   }
 
+  void
+  Mmsapt::
+  register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry)
+  {
+    registry.push_back(ff);
+    ff->setIndex(m_feature_names.size());
+    for (int i = 0; i < ff->fcnt(); ++i)
+      {
+	m_feature_names.push_back(ff->fname(i));
+	m_is_logval.push_back(ff->isLogVal(i));
+	m_is_integer.push_back(ff->isIntegerValued(i));
+      }
+  }
+
+  bool 
+  Mmsapt::
+  isLogVal(int i) const { return m_is_logval.at(i); }
+
+  bool 
+  Mmsapt::
+  isInteger(int i) const { return m_is_integer.at(i); }
+
   void
   Mmsapt::
   init(string const& line)
   {
     map<string,string>::const_iterator m;
-    map<string,string> param;
-    parseLine(line,param);
+    parseLine(line,this->param);
+
+    this->m_numScoreComponents = atoi(param["num-features"].c_str());
     
     m = param.find("config");
     if (m != param.end())
       read_config_file(m->second,param);
-    
-    bname = param["base"];
+
+    bname = param["base"]; 
     L1    = param["L1"];
     L2    = param["L2"];
-    assert(bname.size());
-    assert(L1.size());
-    assert(L2.size());
-
-    m = param.find("pfwd-denom");
-    m_pfwd_denom = m != param.end() ? m->second[0] : 's';
-    
-    m = param.find("smooth");
-    m_lbop_parameter = m != param.end() ? atof(m->second.c_str()) : .05;
 
-    m = param.find("max-samples");
-    m_default_sample_size = m != param.end() ? atoi(m->second.c_str()) : 1000;
+    UTIL_THROW_IF2(bname.size() == 0, "Missing corpus base name at " << HERE);
+    UTIL_THROW_IF2(L1.size() == 0, "Missing L1 tag at " << HERE);
+    UTIL_THROW_IF2(L2.size() == 0, "Missing L2 tag at " << HERE);
 
-    if ((m = param.find("logcnt-features")) != param.end())
-      withLogCountFeatures = m->second != "0";
-
-    if ((m = param.find("coh")) != param.end())
-      withCoherence = m->second != "0";
-    
-    if ((m = param.find("pfwd")) != param.end())
-      m_pfwd_features = (m->second == "0" ? "" : m->second);
-
-    if (m_pfwd_features == "1") // legacy; deprecated
-      m_pfwd_features[0] = m_pfwd_denom;
+    // set defaults for all parameters if not specified so far
+    pair<string,string> dflt("input-factor","0");
+    input_factor = atoi(param.insert(dflt).first->second.c_str());
+    // shouldn't that be a string?
     
-    if ((m = param.find("pbwd")) != param.end())
-      m_pbwd_features = (m->second == "0" ? "" : m->second);
+    dflt = pair<string,string> ("smooth",".01");
+    m_lbop_conf = atof(param.insert(dflt).first->second.c_str());
 
-    if (m_pbwd_features == "1") 
-      m_pbwd_features = "r"; // lecagy; deprecated
+    dflt = pair<string,string> ("lexalpha","0");
+    m_lex_alpha = atof(param.insert(dflt).first->second.c_str());
 
-    if ((m = param.find("lexalpha")) != param.end())
-      m_lex_alpha = atof(m->second.c_str());
+    dflt = pair<string,string> ("sample","1000");
+    m_default_sample_size = atoi(param.insert(dflt).first->second.c_str());
 
-    m = param.find("workers");
-    m_workers = m != param.end() ? atoi(m->second.c_str()) : 8;
+    dflt = pair<string,string>("workers","8");
+    m_workers = atoi(param.insert(dflt).first->second.c_str());
     m_workers = min(m_workers,24UL);
 
-    if ((m = param.find("limit")) != param.end()) 
-      m_tableLimit = atoi(m->second.c_str());
+    dflt = pair<string,string>("limit","20");
+    m_tableLimit = atoi(param.insert(dflt).first->second.c_str());
 
-    m = param.find("cache-size");
-    m_history.reserve(m != param.end()?max(1000,atoi(m->second.c_str())):10000);
+    dflt = pair<string,string>("cache","10000");
+    size_t hsize = max(1000,atoi(param.insert(dflt).first->second.c_str()));
+    m_history.reserve(hsize);
     // in plain language: cache size is at least 1000, and 10,000 by default
     // this cache keeps track of the most frequently used target phrase collections
     // even when not actively in use
-    
-    this->m_numScoreComponents = atoi(param["num-features"].c_str());
 
-    m = param.find("ifactor");
-    input_factor = m != param.end() ? atoi(m->second.c_str()) : 0;
+    // Feature functions are initialized  in function Load();
+    param.insert(pair<string,string>("pfwd",   "g"));  
+    param.insert(pair<string,string>("pbwd",   "g"));  
+    param.insert(pair<string,string>("logcnt", "0")); 
+    param.insert(pair<string,string>("coh",    "0")); 
+    param.insert(pair<string,string>("rare",   "1")); 
+    param.insert(pair<string,string>("prov",   "1")); 
     
     poolCounts = true;
     
     if ((m = param.find("extra")) != param.end()) 
       extra_data = m->second;
 
+    // check for unknown parameters
+    vector<string> known_parameters; known_parameters.reserve(50);
+    known_parameters.push_back("L1");
+    known_parameters.push_back("L2");
+    known_parameters.push_back("Mmsapt");
+    known_parameters.push_back("base");
+    known_parameters.push_back("cache");
+    known_parameters.push_back("coh");
+    known_parameters.push_back("config");
+    known_parameters.push_back("extra");
+    known_parameters.push_back("input-factor");
+    known_parameters.push_back("lexalpha");
+    known_parameters.push_back("limit");
+    known_parameters.push_back("logcnt");
+    known_parameters.push_back("name");
+    known_parameters.push_back("num-features");
+    known_parameters.push_back("output-factor");
+    known_parameters.push_back("pbwd");
+    known_parameters.push_back("pfwd");
+    known_parameters.push_back("prov");
+    known_parameters.push_back("rare");
+    known_parameters.push_back("sample");
+    known_parameters.push_back("smooth");
+    known_parameters.push_back("unal");
+    known_parameters.push_back("workers");
+    for (map<string,string>::iterator m = param.begin(); m != param.end(); ++m)
+      {
+	UTIL_THROW_IF2(!binary_search(known_parameters.begin(),
+				      known_parameters.end(), m->first),
+		       HERE << ": Unknown parameter specification for Mmsapt: " 
+		       << m->first);
+      }
   }
 
   void
   Mmsapt::
-  load_extra_data(string bname)
+  load_extra_data(string bname, bool locking = true)
   {
     // TO DO: ADD CHECKS FOR ROBUSTNESS
     // - file existence?
@@ -186,122 +260,120 @@ namespace Moses
     while(getline(in2,line)) text2.push_back(line);
     while(getline(ina,line)) symal.push_back(line);
 
-    lock_guard<mutex> guard(this->lock);
+    boost::scoped_ptr<lock_guard<mutex> > guard;
+    if (locking) guard.reset(new lock_guard<mutex>(this->lock));
     btdyn = btdyn->add(text1,text2,symal);
     assert(btdyn);
     // cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
   }
 
-  size_t
+  template<typename fftype>
+  void
   Mmsapt::
-  add_corpus_specific_features
-  (vector<sptr<pscorer > >& ffvec, size_t num_feats)
+  check_ff(string const ffname, vector<sptr<pscorer> >* registry)
   {
-    float const lbop = m_lbop_parameter; // just for code readability below
-    // for the time being, we assume that all phrase probability features 
-    // use the same confidence parameter for lower-bound-estimation
-    for (size_t i = 0; i < m_pfwd_features.size(); ++i) 
-      {	
-	UTIL_THROW_IF2(m_pfwd_features[i] != 'g' &&
-		       m_pfwd_features[i] != 'r' &&
-		       m_pfwd_features[i] != 's',
-		       "Can't handle pfwd feature type '" 
-		       << m_pfwd_features[i] << "'.");
-	sptr<PScorePfwd<Token> > ff(new PScorePfwd<Token>());
-	size_t k = num_feats;
-	num_feats = ff->init(num_feats,lbop,m_pfwd_features[i]);
-	for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
-	ffvec.push_back(ff);
+    string const& spec = param[ffname];
+    if (spec == "" || spec == "0") return;
+    if (registry)
+      {
+	sptr<fftype> ff(new fftype(spec));
+	register_ff(ff, *registry);
       }
-    
-    for (size_t i = 0; i < m_pbwd_features.size(); ++i) 
-      {	
-	UTIL_THROW_IF2(m_pbwd_features[i] != 'g' &&
-		       m_pbwd_features[i] != 'r' &&
-		       m_pbwd_features[i] != 's',
-		       "Can't handle pbwd feature type '" 
-		       << m_pbwd_features[i] << "'.");
-	sptr<PScorePbwd<Token> > ff(new PScorePbwd<Token>());
-	size_t k = num_feats;
-	num_feats = ff->init(num_feats,lbop,m_pbwd_features[i]);
-	for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
-	ffvec.push_back(ff);
+    else if (spec[spec.size()-1] == '+') // corpus specific
+      {
+	sptr<fftype> ff(new fftype(spec));
+	register_ff(ff, m_active_ff_fix);
+	ff.reset(new fftype(spec));
+	register_ff(ff, m_active_ff_dyn);
       }
-
-    // if (withPbwd) 
-    //   {
-    // 	sptr<PScorePbwd<Token> > ff(new PScorePbwd<Token>());
-    // 	size_t k = num_feats;
-    // 	num_feats = ff->init(num_feats,lbop);
-    // 	for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
-    // 	ffvec.push_back(ff);
-    //   }
-    
-    if (withLogCountFeatures) 
+    else 
       {
-	sptr<PScoreLogCounts<Token> > ff(new PScoreLogCounts<Token>());
-	size_t k = num_feats;
-	num_feats = ff->init(num_feats);
-	for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
-	ffvec.push_back(ff);
+	sptr<fftype> ff(new fftype(spec));
+	register_ff(ff, m_active_ff_common);
       }
+  }
 
-    return num_feats;
+  template<typename fftype>
+  void
+  Mmsapt::
+  check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry)
+  {
+    string const& spec = param[ffname];
+    if (spec == "" || spec == "0") return;
+    if (registry)
+      {
+	sptr<fftype> ff(new fftype(xtra,spec));
+	register_ff(ff, *registry);
+      }
+    else if (spec[spec.size()-1] == '+') // corpus specific
+      {
+	sptr<fftype> ff(new fftype(xtra,spec));
+	register_ff(ff, m_active_ff_fix);
+	ff.reset(new fftype(xtra,spec));
+	register_ff(ff, m_active_ff_dyn);
+      }
+    else 
+      {
+	sptr<fftype> ff(new fftype(xtra,spec));
+	register_ff(ff, m_active_ff_common);
+      }
   }
 
+  // void
+  // Mmsapt::
+  // add_corpus_specific_features(vector<sptr<pscorer > >& registry)
+  // {
+  //   check_ff<PScorePbwd<Token> >("pbwd",m_lbop_conf,registry);
+  //   check_ff<PScoreLogCnt<Token> >("logcnt",registry);
+  // }
+
   void
   Mmsapt::
   Load()
   {
+    lock_guard<mutex> guard(this->lock);
+
+    // can load only once
+    // UTIL_THROW_IF2(shards.size(),"Mmsapt is already loaded at " << HERE);
+
+    // lexical scores 
+    string lexfile = bname + L1 + "-" + L2 + ".lex";
+    sptr<PScoreLex1<Token> > ff(new PScoreLex1<Token>(param["lex_alpha"],lexfile));
+    register_ff(ff,m_active_ff_common);
+
+    // these are always computed on pooled data
+    check_ff<PScoreRareness<Token> > ("rare", &m_active_ff_common);
+    check_ff<PScoreUnaligned<Token> >("unal", &m_active_ff_common);
+    check_ff<PScoreCoherence<Token> >("coh",  &m_active_ff_common);
+    
+    // for these ones either way is possible (specification ends with '+' 
+    // if corpus-specific 
+    check_ff<PScorePfwd<Token> >("pfwd", m_lbop_conf);
+    check_ff<PScorePbwd<Token> >("pbwd", m_lbop_conf);
+    check_ff<PScoreLogCnt<Token> >("logcnt");
+
+    // These are always corpus-specific
+    check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_fix);
+    check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_dyn);
+
+    UTIL_THROW_IF2(this->m_feature_names.size() != this->m_numScoreComponents,
+		   "At " << HERE << ": number of feature values provided by "
+		   << "Phrase table (" << this->m_feature_names.size()
+		   << ") does not match number specified in Moses config file ("
+		   << this->m_numScoreComponents << ")!\n";);
+
+    // Load corpora. For the time being, we can have one memory-mapped static
+    // corpus and one in-memory dynamic corpus
+    // sptr<mmbitext> btfix(new mmbitext());
     btfix.num_workers = this->m_workers;
     btfix.open(bname, L1, L2);
     btfix.setDefaultSampleSize(m_default_sample_size);
+    // shards.push_back(btfix);
     
-    size_t num_feats = 0;
-    
-    // lexical scores are currently always active 
-    sptr<PScoreLex<Token> > ff(new PScoreLex<Token>(m_lex_alpha));
-    size_t k = num_feats;
-    num_feats = ff->init(num_feats, bname + L1 + "-" + L2 + ".lex");
-    for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
-    m_active_ff_common.push_back(ff);
-    
-    if (withCoherence)
-      {
-	sptr<PScoreCoherence<Token> > ff(new PScoreCoherence<Token>());
-	size_t k = num_feats;
-	num_feats = ff->init(num_feats);
-	for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k));
-	m_active_ff_common.push_back(ff);
-      }
-
-    num_feats = add_corpus_specific_features(m_active_ff_fix,num_feats);
-    // cerr << num_feats << "/" << this->m_numScoreComponents 
-    // << " at " << __FILE__ << ":" << __LINE__ << endl;
-    poolCounts = poolCounts && num_feats == this->m_numScoreComponents;
-    if (!poolCounts)
-      num_feats = add_corpus_specific_features(m_active_ff_dyn, num_feats);
-    
-#if 0
-    cerr << "MMSAPT provides " << num_feats << " features at " 
-	 << __FILE__ << ":" << __LINE__ << endl;
-    BOOST_FOREACH(string const& fname, m_feature_names)
-      cerr << fname << endl;
-#endif
-    UTIL_THROW_IF2(num_feats != this->m_numScoreComponents,
-		   "At " << __FILE__ << ":" << __LINE__
-		   << ": number of feature values provided by Phrase table (" 
-		   << num_feats << ") does not match number specified in "
-		   << "Moses config file (" << this->m_numScoreComponents 
-		   << ")!\n";);
-    
-    
-    btdyn.reset(new imBitext<Token>(btfix.V1, btfix.V2,m_default_sample_size));
+    btdyn.reset(new imbitext(btfix.V1, btfix.V2, m_default_sample_size));
     btdyn->num_workers = this->m_workers;
     if (extra_data.size()) 
-      {
-	load_extra_data(extra_data);
-      }
+      load_extra_data(extra_data,false);
     
 #if 0
     // currently not used
@@ -330,258 +402,345 @@ namespace Moses
 
   TargetPhrase* 
   Mmsapt::
-  createTargetPhrase(Phrase        const& src, 
-		     Bitext<Token> const& bt, 
-		     PhrasePair    const& pp) const
+  mkTPhrase(Phrase const& src,
+	    PhrasePair<Token>* fix, 
+	    PhrasePair<Token>* dyn, 
+	    sptr<Bitext<Token> > const& dynbt) const
   {
-    Word w; uint32_t sid,off,len;    
+    UTIL_THROW_IF2(!fix && !dyn, HERE << 
+		   ": Can't create target phrase from nothing.");
+    vector<float> fvals(this->m_numScoreComponents);
+    PhrasePair<Token> pool = fix ? *fix : *dyn;
+    if (fix) 
+      {
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+	  (*ff)(btfix, *fix, &fvals);
+      }
+    if (dyn)
+      {
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+	  (*ff)(*dynbt, *dyn, &fvals);
+      }
+    
+    if (fix && dyn) { pool += *dyn; }
+    else if (fix)
+      {
+	PhrasePair<Token> zilch; zilch.init();
+	TSA<Token>::tree_iterator m(dynbt->I2.get(), fix->start2, fix->len2);
+	if (m.size() == fix->len2)
+	  zilch.raw2 = m.approxOccurrenceCount();
+	pool += zilch;
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+	  (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
+      }
+    else if (dyn)
+      {
+	PhrasePair<Token> zilch; zilch.init();
+	TSA<Token>::tree_iterator m(btfix.I2.get(), dyn->start2, dyn->len2);
+	if (m.size() == dyn->len2)
+	  zilch.raw2 = m.approxOccurrenceCount();
+	pool += zilch;
+	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+	  (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
+      }
+    if (fix) 
+      {
+ 	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+	  (*ff)(btfix, pool, &fvals);
+      }
+    else
+      {
+ 	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+	  (*ff)(*dynbt, pool, &fvals);
+      }
     TargetPhrase* tp = new TargetPhrase();
-    parse_pid(pp.p2, sid, off, len);
-    Token const* x = bt.T2->sntStart(sid) + off;
-    for (uint32_t k = 0; k < len; ++k)
+    Token const* x = fix ? fix->start2 : dyn->start2;
+    uint32_t len = fix ? fix->len2 : dyn->len2;
+    for (uint32_t k = 0; k < len; ++k, x = x->next())
       {
-	// cerr << (*bt.V2)[x[k].id()] << " at " << __FILE__ << ":" << __LINE__ << endl;
-	StringPiece wrd = (*bt.V2)[x[k].id()];
-	// if ((off+len) > bt.T2->sntLen(sid))
-	// cerr << off << ";" << len << " " << bt.T2->sntLen(sid) << endl;
-	assert(off+len <= bt.T2->sntLen(sid));
-	w.CreateFromString(Output,ofactor,wrd,false);
+	StringPiece wrd = (*(btfix.V2))[x->id()];
+	Word w; w.CreateFromString(Output,ofactor,wrd,false);
 	tp->AddWord(w);
       }
-    tp->GetScoreBreakdown().Assign(this, pp.fvals);
+    tp->GetScoreBreakdown().Assign(this, fvals);
     tp->Evaluate(src);
     return tp;
   }
 
-  // process phrase stats from a single parallel corpus
-  void
-  Mmsapt::
-  process_pstats
-  (Phrase   const& src,
-   uint64_t const  pid1, 
-   pstats   const& stats, 
-   Bitext<Token> const & bt, 
-   TargetPhraseCollection* tpcoll
-   ) const
-  {
-    PhrasePair pp;   
-    pp.init(pid1, stats, this->m_numScoreComponents);
-    pstats::trg_map_t::const_iterator t;
-    for (t = stats.trg.begin(); t != stats.trg.end(); ++t)
-      {
-   	pp.update(t->first,t->second);
-	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
-	  (*ff)(bt,pp);
-	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
-	  (*ff)(bt,pp);
-	tpcoll->Add(createTargetPhrase(src,bt,pp));
-      }
-  }
+  // TargetPhrase* 
+  // Mmsapt::
+  // mkTPhrase(Phrase        const& src, 
+  // 		     Bitext<Token> const& bt, 
+  // 		     PhrasePair    const& pp) const
+  // {
+  //   Word w; uint32_t sid,off,len;    
+  //   TargetPhrase* tp = new TargetPhrase();
+  //   parse_pid(pp.p2, sid, off, len);
+  //   Token const* x = bt.T2->sntStart(sid) + off;
+  //   for (uint32_t k = 0; k < len; ++k)
+  //     {
+  // 	// cerr << (*bt.V2)[x[k].id()] << " at " << __FILE__ << ":" << __LINE__ << endl;
+  // 	StringPiece wrd = (*bt.V2)[x[k].id()];
+  // 	// if ((off+len) > bt.T2->sntLen(sid))
+  // 	// cerr << off << ";" << len << " " << bt.T2->sntLen(sid) << endl;
+  // 	assert(off+len <= bt.T2->sntLen(sid));
+  // 	w.CreateFromString(Output,ofactor,wrd,false);
+  // 	tp->AddWord(w);
+  //     }
+  //   tp->GetScoreBreakdown().Assign(this, pp.fvals);
+  //   tp->Evaluate(src);
+  //   return tp;
+  // }
+
+  // // process phrase stats from a single parallel corpus
+  // void
+  // Mmsapt::
+  // process_pstats
+  // (Phrase   const& src,
+  //  uint64_t const  pid1, 
+  //  pstats   const& stats, 
+  //  Bitext<Token> const & bt, 
+  //  TargetPhraseCollection* tpcoll
+  //  ) const
+  // {
+  //   PhrasePair pp;   
+  //   pp.init(pid1, stats, this->m_numScoreComponents);
+  //   pstats::trg_map_t::const_iterator t;
+  //   for (t = stats.trg.begin(); t != stats.trg.end(); ++t)
+  //     {
+  //  	pp.update(t->first,t->second);
+  // 	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+  // 	  (*ff)(bt,pp);
+  // 	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+  // 	  (*ff)(bt,pp);
+  // 	tpcoll->Add(mkTPhrase(src,bt,pp));
+  //     }
+  // }
+
+  // void
+  // Mmsapt::
+  // ScorePPfix(PhrasePair& pp) const
+  // {
+  //   BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+  //     (*ff)(btfix,pp);
+  //   BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+  //     (*ff)(btfix,pp);
+  // }
+
+//   // process phrase stats from a single parallel corpus
+//   bool
+//   Mmsapt::
+//   pool_pstats(Phrase   const& src,
+// 	      uint64_t const  pid1a, 
+// 	      pstats        * statsa, 
+// 	      Bitext<Token> const & bta,
+// 	      uint64_t const  pid1b, 
+// 	      pstats   const* statsb, 
+// 	      Bitext<Token> const & btb,
+// 	      TargetPhraseCollection* tpcoll) const
+//   {
+//     PhrasePair pp;
+//     if (statsa && statsb)
+//       pp.init(pid1b, *statsa, *statsb, this->m_numScoreComponents);
+//     else if (statsa)
+//       pp.init(pid1a, *statsa, this->m_numScoreComponents);
+//     else if (statsb)
+//       pp.init(pid1b, *statsb, this->m_numScoreComponents);
+//     else return false; // throw "no stats for pooling available!";
+
+//     pstats::trg_map_t::const_iterator b;
+//     pstats::trg_map_t::iterator a;
+//     if (statsb)
+//       {
+// 	for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
+// 	  {
+// 	    uint32_t sid,off,len;    
+// 	    parse_pid(b->first, sid, off, len);
+// 	    Token const* x = btb.T2->sntStart(sid) + off;
+// 	    TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
+// 	    if (m.size() == len) 
+// 	      {
+// 		;
+// 		if (statsa && ((a = statsa->trg.find(m.getPid())) 
+// 			       != statsa->trg.end()))
+// 		  {
+// 		    pp.update(b->first,a->second,b->second);
+// 		    a->second.invalidate();
+// 		  }
+// 		else 
+// 		  pp.update(b->first,m.approxOccurrenceCount(),
+// 			    b->second);
+// 	      }
+// 	    else pp.update(b->first,b->second);
+// 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+// 	      (*ff)(btb,pp);
+// 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+// 	      (*ff)(btb,pp);
+// 	    tpcoll->Add(mkTPhrase(src,btb,pp));
+// 	  }
+//       }
+//     if (!statsa) return statsb != NULL;
+//     for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
+//       {
+// 	uint32_t sid,off,len;
+// 	if (!a->second.valid()) continue;
+// 	parse_pid(a->first, sid, off, len);
+// 	if (btb.T2)
+// 	  {
+// 	    Token const* x = bta.T2->sntStart(sid) + off;
+// 	    TSA<Token>::tree_iterator m(btb.I2.get(), x, len);
+// 	    if (m.size() == len) 
+// 	      pp.update(a->first,m.approxOccurrenceCount(),a->second);
+// 	    else 
+// 	      pp.update(a->first,a->second);
+// 	  }
+// 	else pp.update(a->first,a->second);
+// #if 0
+// 	// jstats const& j = a->second;
+// 	cerr << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " 
+// 	     << bta.T2->pid2str(bta.V2.get(),pp.p2) << endl;
+// 	cerr << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " " 
+// 	     << pp.joint << " " << pp.raw2 << endl;
+// #endif
+
+// 	UTIL_THROW_IF2(pp.raw2 == 0, 
+// 		       "OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " 
+// 		       << bta.T2->pid2str(bta.V2.get(),pp.p2) << ": "
+// 		       << pp.raw1 << " " << pp.sample1 << " " 
+// 		       << pp.good1 << " " << pp.joint << " " 
+// 		       << pp.raw2);
+// 	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+// 	  (*ff)(bta,pp);
+// 	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+// 	  (*ff)(bta,pp);
+// 	tpcoll->Add(mkTPhrase(src,bta,pp));
+//       }
+//     return true;
+//   }
 
-  void
-  Mmsapt::
-  ScorePPfix(bitext::PhrasePair& pp) const
-  {
-    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
-      (*ff)(btfix,pp);
-    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
-      (*ff)(btfix,pp);
-  }
 
-  // process phrase stats from a single parallel corpus
-  bool
-  Mmsapt::
-  pool_pstats(Phrase   const& src,
-	      uint64_t const  pid1a, 
-	      pstats        * statsa, 
-	      Bitext<Token> const & bta,
-	      uint64_t const  pid1b, 
-	      pstats   const* statsb, 
-	      Bitext<Token> const & btb,
-	      TargetPhraseCollection* tpcoll) const
-  {
-    PhrasePair pp;
-    if (statsa && statsb)
-      pp.init(pid1b, *statsa, *statsb, this->m_numScoreComponents);
-    else if (statsa)
-      pp.init(pid1a, *statsa, this->m_numScoreComponents);
-    else if (statsb)
-      pp.init(pid1b, *statsb, this->m_numScoreComponents);
-    else return false; // throw "no stats for pooling available!";
-
-    pstats::trg_map_t::const_iterator b;
-    pstats::trg_map_t::iterator a;
-    if (statsb)
-      {
-	for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
-	  {
-	    uint32_t sid,off,len;    
-	    parse_pid(b->first, sid, off, len);
-	    Token const* x = bta.T2->sntStart(sid) + off;
-	    TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
-	    if (m.size() == len) 
-	      {
-		;
-		if (statsa && ((a = statsa->trg.find(m.getPid())) 
-			       != statsa->trg.end()))
-		  {
-		    pp.update(b->first,a->second,b->second);
-		    a->second.invalidate();
-		  }
-		else 
-		  pp.update(b->first,m.approxOccurrenceCount(),
-			    b->second);
-	      }
-	    else pp.update(b->first,b->second);
-	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
-	      (*ff)(btb,pp);
-	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
-	      (*ff)(btb,pp);
-	    tpcoll->Add(createTargetPhrase(src,btb,pp));
-	  }
-      }
-    if (!statsa) return statsb != NULL;
-    for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
-      {
-	uint32_t sid,off,len;
-	if (!a->second.valid()) continue;
-	parse_pid(a->first, sid, off, len);
-	if (btb.T2)
-	  {
-	    Token const* x = bta.T2->sntStart(sid) + off;
-	    TSA<Token>::tree_iterator m(btb.I2.get(), x, x+len);
-	    if (m.size() == len) 
-	      pp.update(a->first,m.approxOccurrenceCount(),a->second);
-	    else 
-	      pp.update(a->first,a->second);
-	  }
-	else 
-	  pp.update(a->first,a->second);
-#if 0
-	// jstats const& j = a->second;
-	cerr << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " 
-	     << bta.T2->pid2str(bta.V2.get(),pp.p2) << endl;
-	cerr << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " " 
-	     << pp.joint << " " << pp.raw2 << endl;
-#endif
 
-	UTIL_THROW_IF2(pp.raw2 == 0, 
-		       "OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " 
-		       << bta.T2->pid2str(bta.V2.get(),pp.p2) << ": "
-		       << pp.raw1 << " " << pp.sample1 << " " 
-		       << pp.good1 << " " << pp.joint << " " 
-		       << pp.raw2);
-	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
-	  (*ff)(bta,pp);
-	BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
-	  (*ff)(bta,pp);
-	tpcoll->Add(createTargetPhrase(src,bta,pp));
-      }
-    return true;
-  }
   
-  
-  // process phrase stats from a single parallel corpus
-  bool
-  Mmsapt::
-  combine_pstats
-  (Phrase   const& src,
-   uint64_t const  pid1a, pstats      * statsa, Bitext<Token> const & bta,
-   uint64_t const  pid1b, pstats const* statsb, Bitext<Token> const & btb,
-   TargetPhraseCollection* tpcoll) const
-  {
-    PhrasePair ppfix,ppdyn,pool; 
-    // ppfix: counts from btfix
-    // ppdyn: counts from btdyn
-    // pool: pooled counts from both
-    Word w;
-    if (statsa) ppfix.init(pid1a,*statsa,this->m_numScoreComponents);
-    if (statsb) ppdyn.init(pid1b,*statsb,this->m_numScoreComponents);
-    pstats::trg_map_t::const_iterator b;
-    pstats::trg_map_t::iterator a;
-
-    if (statsb)
-      {
-	pool.init(pid1b,*statsb,0);
-	for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
-	  {
-	    ppdyn.update(b->first,b->second);
-	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
-	      (*ff)(btb,ppdyn);
+  // // process phrase stats from a single parallel corpus
+  // bool
+  // Mmsapt::
+  // combine_pstats
+  // (Phrase   const& src,
+  //  uint64_t const  pid1a, pstats      * statsa, Bitext<Token> const & bta,
+  //  uint64_t const  pid1b, pstats const* statsb, Bitext<Token> const & btb,
+  //  TargetPhraseCollection* tpcoll) const
+  // {
+  //   if (!statsa && !statsb) return false; 
+
+  //   PhrasePair ppfix,ppdyn,pool; Word w;
+  //   // ppfix: counts from btfix
+  //   // ppdyn: counts from btdyn
+  //   // pool: pooled counts from both
+
+  //   pstats::trg_map_t::const_iterator b;
+  //   pstats::trg_map_t::iterator a;
+
+    
+  //   set<uint64_t> check;
+  //   if (statsb)
+  //     {
+  // 	ppdyn.init(pid1b,*statsb,this->m_numScoreComponents);
+  // 	if (statsa)
+  // 	  {
+  // 	    pool.init(pid1b, *statsa, *statsb, 0);
+  // 	    ppfix.init(pid1a,*statsa, 0);
+  // 	  }
+  // 	else 
+  // 	  {
+  // 	    pool.init(pid1b, *statsb,0);
+  // 	    ppfix.init();
+  // 	  }
+	
+  // 	for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b)
+  // 	  {
+  // 	    ppdyn.update(b->first,b->second);
+  // 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+  // 	      (*ff)(btb,ppdyn);
 	    
-	    uint32_t sid,off,len;    
-	    parse_pid(b->first, sid, off, len);
-	    Token const* x = bta.T2->sntStart(sid) + off;
-	    TSA<Token>::tree_iterator m(bta.I2.get(),x,x+len);
+  // 	    uint32_t sid,off,len;    
+  // 	    parse_pid(b->first, sid, off, len);
+  // 	    Token const* x = btb.T2->sntStart(sid) + off;
+  // 	    TSA<Token>::tree_iterator m(bta.I2.get(),x,len);
 	    
-	    if (m.size() && statsa && 
-		((a = statsa->trg.find(m.getPid())) != statsa->trg.end()))
-	      {
-		// phrase pair found also in btfix
-		ppfix.update(a->first,a->second);
-		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
-		  (*ff)(bta,ppfix,&ppdyn.fvals);
-		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
-		  (*ff)(bta,ppfix,&ppdyn.fvals);
-		a->second.invalidate();
-	      }
-	    else 
-	      {
-		// phrase pair was not found in btfix
-
-		// ... but the source phrase was  
-		if (m.size()) 
-		  pool.update(b->first,m.approxOccurrenceCount(), b->second);
-
-		// ... and not even the source phrase 
-		else 
-		  pool.update(b->first,b->second);
-		
-		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
-		  (*ff)(btb,pool,&ppdyn.fvals);
-		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
-		  (*ff)(btb,pool,&ppdyn.fvals);
-		
-	      }
-
-	    tpcoll->Add(createTargetPhrase(src,btb,ppdyn));
-	  }
-      }
-
-    // now deal with all phraise pairs that are ONLY in btfix
-    // (the ones that are in both were dealt with above)
-    if (statsa)
-      {
-	pool.init(pid1a,*statsa,0);
-	for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
-	  {
-	    if (!a->second.valid()) continue; // done above
-	    ppfix.update(a->first,a->second);
-	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
-	      (*ff)(bta,ppfix);
-	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
-	      (*ff)(bta,ppfix);
+  // 	    Token const* y = m.getToken(0);
+  // 	    for (size_t i = 0; i < len; ++i)
+  // 	      cout << x[i].id() << " " << endl;
+  // 	    for (size_t i = 0; i < m.size(); ++i)
+  // 	      cout << y[i].id() << " " << endl;
 	    
-	    if (btb.I2)
-	      {
-		uint32_t sid,off,len;    
-		parse_pid(a->first, sid, off, len);
-		Token const* x = bta.T2->sntStart(sid) + off;
-		TSA<Token>::tree_iterator m(btb.I2.get(),x,x+len);
-		if (m.size())
-		  pool.update(a->first,m.approxOccurrenceCount(),a->second);
-		else
-		  pool.update(a->first,a->second);
-	      }
-	    else pool.update(a->first,a->second);
-	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
-	      (*ff)(btb,pool,&ppfix.fvals);
-	    if (ppfix.p2)
-	      tpcoll->Add(createTargetPhrase(src,bta,ppfix));
-	  }
-      }
-    return (statsa || statsb);
-  }
+  // 	    if (statsa && m.size() &&  
+  // 		((a = statsa->trg.find(m.getPid())) != statsa->trg.end()))
+  // 	      { // i.e., phrase pair found also in btfix
+  // 		ppfix.update(a->first,a->second);
+  // 		pool.update(b->first, b->second, a->second);
+  // 		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+  // 		  (*ff)(bta, ppfix, &ppdyn.fvals);
+  // 		check.insert(a->first); 
+  // 	      }
+  // 	    else // phrase pair was not found in btfix
+  // 	      {
+  // 		if (m.size()) // ... but the source phrase was  
+  // 		  {
+  // 		    pool.update(b->first, m.approxOccurrenceCount(), b->second);
+  // 		    ppfix.update(b->first,m.approxOccurrenceCount());
+  // 		  }
+  // 		else // ... and not even the source phrase 
+  // 		  {
+  // 		    pool.update(b->first, b->second);
+  // 		    ppfix.update(b->first,0);
+  // 		  }		    
+  // 		BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+  // 		  (*ff)(btb, ff->allowPooling() ? pool : ppfix, &ppdyn.fvals);
+  // 	      }
+  // 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+  // 	      (*ff)(btb, pool, &ppdyn.fvals);
+  // 	    tpcoll->Add(mkTPhrase(src,btb,ppdyn));
+  // 	  }
+  //     }
+
+  //   // now deal with all phraise pairs that are ONLY in btfix
+  //   // (the ones that are in both were dealt with above)
+  //   if (statsa)
+  //     {
+  // 	ppfix.init(pid1a, *statsa, this->m_numScoreComponents);
+  // 	pool.init(pid1a,  *statsa, 0);
+  // 	ppdyn.init();
+  // 	for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a)
+  // 	  {
+  // 	    if (check.find(a->first) != check.end()) 
+  // 	      continue;
+
+  // 	    ppfix.update(a->first, a->second);
+  // 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
+  // 	      (*ff)(bta, ppfix);
+	    
+  // 	    if (btb.I2)
+  // 	      {
+  // 		uint32_t sid,off,len;    
+  // 		parse_pid(a->first, sid, off, len);
+  // 		Token const* x = bta.T2->sntStart(sid) + off;
+  // 		TSA<Token>::tree_iterator m(btb.I2.get(), x, len);
+  // 		if (m.size())
+  // 		  pool.update(a->first, m.approxOccurrenceCount(), a->second);
+  // 		else
+  // 		  pool.update(a->first, a->second);
+  // 	      }
+  // 	    else pool.update(a->first, a->second);
+  // 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
+  // 	      (*ff)(btb, ff->allowPooling() ? pool : ppdyn, &ppfix.fvals);
+  // 	    BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
+  // 	      (*ff)(bta, pool, &ppfix.fvals);
+  // 	    if (ppfix.p2)
+  // 	      tpcoll->Add(mkTPhrase(src, bta, ppfix));
+  // 	  }
+  //     }
+  //   return true;
+  // }
   
   Mmsapt::
   TargetPhraseCollectionWrapper::
@@ -595,8 +754,34 @@ namespace Moses
   {
     assert(this->refCount == 0);
   }
-
   
+  template<typename Token>
+  void 
+  expand(typename Bitext<Token>::iter const& m, 
+	 Bitext<Token> const& bt, 
+	 pstats const& ps, vector<PhrasePair<Token> >& dest)
+  {
+    dest.reserve(ps.trg.size());
+    PhrasePair<Token> pp;
+    pp.init(m.getToken(0), m.size(), &ps, 0);
+    // cout << HERE << " " << toString(*(bt.V1),pp.start1,pp.len1) << endl;
+    pstats::trg_map_t::const_iterator a;
+    for (a = ps.trg.begin(); a != ps.trg.end(); ++a)
+      {
+	uint32_t sid,off,len;
+	parse_pid(a->first, sid, off, len);
+	pp.update(bt.T2->sntStart(sid)+off, len, a->second);
+	dest.push_back(pp);
+      }
+    typename PhrasePair<Token>::SortByTargetIdSeq sorter;
+    sort(dest.begin(), dest.end(),sorter);
+#if 0
+    BOOST_FOREACH(PhrasePair<Token> const& p, dest)
+      cout << toString (*bt.V1,p.start1,p.len1) << " ::: " 
+	   << toString (*bt.V2,p.start2,p.len2) << " " 
+	   << p.joint << endl;
+#endif
+  }
 
   // This is not the most efficient way of phrase lookup! 
   TargetPhraseCollection const* 
@@ -605,13 +790,9 @@ namespace Moses
   {
     // map from Moses Phrase to internal id sequence
     vector<id_type> sphrase; 
-    fillIdSeq(src,input_factor,*btfix.V1,sphrase);
+    fillIdSeq(src,input_factor,*(btfix.V1),sphrase);
     if (sphrase.size() == 0) return NULL;
     
-    // lookup in static bitext 
-    TSA<Token>::tree_iterator mfix(btfix.I1.get(),&sphrase[0],sphrase.size());
-
-    // lookup in dynamic bitext
     // Reserve a local copy of the dynamic bitext in its current form. /btdyn/
     // is set to a new copy of the dynamic bitext every time a sentence pair
     // is added. /dyn/ keeps the old bitext around as long as we need it.
@@ -621,12 +802,13 @@ namespace Moses
       dyn = btdyn;
     }
     assert(dyn);
+
+    // lookup phrases in both bitexts
+    TSA<Token>::tree_iterator mfix(btfix.I1.get(), &sphrase[0], sphrase.size());
     TSA<Token>::tree_iterator mdyn(dyn->I1.get());
     if (dyn->I1.get())
-      {
-	for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
-	  mdyn.extend(sphrase[i]);
-      }
+      for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i)
+	mdyn.extend(sphrase[i]);
 
 #if 0
     cerr << src << endl;
@@ -634,43 +816,62 @@ namespace Moses
 	 << mdyn.size() << " " << mdyn.getPid() << endl;
 #endif
 
-    // phrase not found in either
-    if (mdyn.size() != sphrase.size() && 
-	mfix.size() != sphrase.size()) 
-      return NULL; // not found
+    if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size()) 
+      return NULL; // phrase not found in either bitext
 
     // cache lookup:
-
-    uint64_t phrasekey;
-    if (mfix.size() == sphrase.size())
-      phrasekey = (mfix.getPid()<<1);
-    else
-      phrasekey = (mdyn.getPid()<<1)+1;
-
+    uint64_t phrasekey = (mfix.size() == sphrase.size() ? (mfix.getPid()<<1) 
+			  : (mdyn.getPid()<<1)+1);
     size_t revision = dyn->revision();
     {
       boost::lock_guard<boost::mutex> guard(this->lock);
       tpc_cache_t::iterator c = m_cache.find(phrasekey);
+      // TO DO: we should revise the revision mechanism: we take the length
+      // of the dynamic bitext (in sentences) at the time the PT entry
+      // was stored as the time stamp. For each word in the
+      // vocabulary, we also store its most recent occurrence in the
+      // bitext. Only if the timestamp of each word in the phrase is
+      // newer than the timestamp of the phrase itself we must update 
+      // the entry. 
       if (c != m_cache.end() && c->second->revision == revision)
 	return encache(c->second);
     }
     
-    // not found or not up to date
+    // OK: pt entry not found or not up to date
+    // lookup and expansion could be done in parallel threds, 
+    // but ppdyn is probably small anyway
+    // TO DO: have Bitexts return lists of PhrasePairs instead of pstats
+    // no need to expand pstats at every single lookup again, especially 
+    // for btfix.
     sptr<pstats> sfix,sdyn;
-    if (mfix.size() == sphrase.size())
-      sfix = btfix.lookup(mfix);
-    if (mdyn.size() == sphrase.size())
-      sdyn = dyn->lookup(mdyn);
+    if (mfix.size() == sphrase.size()) sfix = btfix.lookup(mfix);
+    if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(mdyn);
+
+    vector<PhrasePair<Token> > ppfix,ppdyn;
+    if (sfix) expand(mfix, btfix, *sfix, ppfix);
+    if (sdyn) expand(mdyn, *dyn, *sdyn, ppdyn);
     
-    TargetPhraseCollectionWrapper* 
-      ret = new TargetPhraseCollectionWrapper(revision,phrasekey);
-    if ((poolCounts && 
-	 pool_pstats(src, mfix.getPid(),sfix.get(),btfix, 
-		     mdyn.getPid(),sdyn.get(),*dyn,ret))
-	|| combine_pstats(src, mfix.getPid(),sfix.get(),btfix, 
-			  mdyn.getPid(),sdyn.get(),*dyn,ret))
+    // now we have two lists of Phrase Pairs, let's merge them
+    TargetPhraseCollectionWrapper* ret;
+    ret = new TargetPhraseCollectionWrapper(revision,phrasekey);
+    PhrasePair<Token>::SortByTargetIdSeq sorter;
+    size_t i = 0; size_t k = 0;
+    while (i < ppfix.size() && k < ppdyn.size())
+      {
+	int cmp = sorter.cmp(ppfix[i], ppdyn[k]);
+	if      (cmp  < 0) ret->Add(mkTPhrase(src,&ppfix[i++],NULL,dyn));
+	else if (cmp == 0) ret->Add(mkTPhrase(src,&ppfix[i++],&ppdyn[k++],dyn));
+	else               ret->Add(mkTPhrase(src,NULL,&ppdyn[k++],dyn));
+      }
+    while (i < ppfix.size()) ret->Add(mkTPhrase(src,&ppfix[i++],NULL,dyn));
+    while (k < ppdyn.size()) ret->Add(mkTPhrase(src,NULL,&ppdyn[k++],dyn));
+    if (m_tableLimit) ret->Prune(true, m_tableLimit);
+    else ret->Prune(true,ret->GetSize());
+#if 0
+    if (combine_pstats(src, 
+		       mfix.getPid(), sfix.get(), btfix, 
+		       mdyn.getPid(), sdyn.get(),  *dyn, ret))
       {
-	if (m_tableLimit) ret->Prune(true,m_tableLimit);
 #if 0
 	sort(ret->begin(), ret->end(), CompareTargetPhrase());
 	cout << "SOURCE PHRASE: " << src << endl;
@@ -686,6 +887,9 @@ namespace Moses
 	  }
 #endif
       }
+#endif
+
+    // put the result in the cache and return
     boost::lock_guard<boost::mutex> guard(this->lock);
     m_cache[phrasekey] = ret;
     return encache(ret);
@@ -839,6 +1043,7 @@ namespace Moses
     TSA<Token>::tree_iterator mfix(btfix.I1.get(),&myphrase[0],myphrase.size());
     if (mfix.size() == myphrase.size()) 
       {
+	btfix.prep(mfix);
 	// cerr << phrase << " " << mfix.approxOccurrenceCount() << endl;
 	return true;
       }
@@ -854,6 +1059,7 @@ namespace Moses
       {
 	for (size_t i = 0; mdyn.size() == i && i < myphrase.size(); ++i)
 	  mdyn.extend(myphrase[i]);
+	if (mdyn.size() == myphrase.size()) dyn->prep(mdyn);
       }
     return mdyn.size() == myphrase.size();
   }
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h
index b6be361313..a7ece8fdb2 100644
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@@ -19,6 +19,7 @@
 #include "moses/TranslationModel/UG/mm/ug_typedefs.h"
 #include "moses/TranslationModel/UG/mm/tpt_pickler.h"
 #include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
 #include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h"
 
 #include "moses/InputFileStream.h"
@@ -29,7 +30,8 @@
 #include <map>
 
 #include "moses/TranslationModel/PhraseDictionary.h"
-#include "mmsapt_phrase_scorers.h"
+#include "mmsapt_phrase_scorers.h" // deprecated
+#include "sapt_phrase_scorers.h"
 
 // TO DO:
 // - make lexical phrase scorer take addition to the "dynamic overlay" into account
@@ -47,47 +49,68 @@ namespace Moses
 #endif
   {
     friend class Alignment;
+    map<string,string> param;
   public:    
     typedef L2R_Token<SimpleWordId> Token;
     typedef mmBitext<Token> mmbitext;
     typedef imBitext<Token> imbitext;
+    typedef Bitext<Token>     bitext;
     typedef TSA<Token>           tsa;
     typedef PhraseScorer<Token> pscorer;
+
   private:
+    // vector<sptr<bitext> > shards;
     mmbitext btfix; 
-    sptr<imbitext> btdyn;
+    sptr<imbitext> btdyn; 
     string bname,extra_data;
     string L1;
     string L2;
-    float  m_lbop_parameter;
-    float  m_lex_alpha; 
+    float  m_lbop_conf; // confidence level for lbop smoothing
+    float  m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing
     // alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
     // must be > 0 if dynamic 
     size_t m_default_sample_size;
     size_t m_workers;  // number of worker threads for sampling the bitexts
 
-    // deprecated!
-    char m_pfwd_denom; // denominator for computation of fwd phrase score:
-    // 'r' - divide by raw count
-    // 's' - divide by sample count
-    // 'g' - devide by number of "good" (i.e. coherent) samples 
-    // size_t num_features;
+    // // deprecated!
+    // char m_pfwd_denom; // denominator for computation of fwd phrase score:
+    // // 'r' - divide by raw count
+    // // 's' - divide by sample count
+    // // 'g' - devide by number of "good" (i.e. coherent) samples 
+    // // size_t num_features;
 
     size_t input_factor;
     size_t output_factor; // we can actually return entire Tokens!
 
-    bool withLogCountFeatures; // add logs of counts as features?
-    bool withCoherence; 
-    string m_pfwd_features; // which pfwd functions to use
-    string m_pbwd_features; // which pbwd functions to use
+    // bool withLogCountFeatures; // add logs of counts as features?
+    // bool withCoherence; 
+    // string m_pfwd_features; // which pfwd functions to use
+    // string m_pbwd_features; // which pbwd functions to use
+
+    // for display for human inspection (ttable dumps):
     vector<string> m_feature_names; // names of features activated
+    vector<bool> m_is_logval;  // keeps track of which features are log valued 
+    vector<bool> m_is_integer; // keeps track of which features are integer valued 
+
     vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
     vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
     vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)
 
-    size_t
-    add_corpus_specific_features
-    (vector<sptr<pscorer > >& ffvec, size_t num_feats);
+    void
+    register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry);
+
+    template<typename fftype>
+    void 
+    check_ff(string const ffname,vector<sptr<pscorer> >* registry = NULL);
+    // add feature function if specified 
+    
+    template<typename fftype>
+    void 
+    check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry = NULL);
+    // add feature function if specified
+
+    void
+    add_corpus_specific_features(vector<sptr<pscorer > >& ffvec);
     
     // built-in feature functions
     // PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
@@ -140,12 +163,24 @@ namespace Moses
     mm2dtable_t COOCraw;
 
     TargetPhrase* 
-    createTargetPhrase
+    mkTPhrase(Phrase const& src, 
+	      Moses::bitext::PhrasePair<Token>* fix, 
+	      Moses::bitext::PhrasePair<Token>* dyn, 
+	      sptr<Bitext<Token> > const& dynbt) const;
+
+    // template<typename Token>
+    // void 
+    // expand(typename Bitext<Token>::iter const& m, Bitext<Token> const& bt, 
+    // 	   pstats const& pstats, vector<PhrasePair<Token> >& dest);
+    
+#if 0
+    TargetPhrase* 
+    mkTPhrase
     (Phrase        const& src, 
      Bitext<Token> const& bt, 
-     bitext::PhrasePair    const& pp
+     Moses::bitext::PhrasePair const& pp
      ) const;
-
+#endif
     void
     process_pstats
     (Phrase   const& src,
@@ -180,7 +215,7 @@ namespace Moses
      ) const;
 
     void
-    load_extra_data(string bname);
+    load_extra_data(string bname, bool locking);
 
     mutable size_t m_tpc_ctr;
   public:
@@ -231,8 +266,14 @@ namespace Moses
     vector<string> const&
     GetFeatureNames() const;
     
-    void
-    ScorePPfix(bitext::PhrasePair& pp) const;
+    // void
+    // ScorePPfix(bitext::PhrasePair& pp) const;
+
+    bool
+    isLogVal(int i) const;
+    
+    bool
+    isInteger(int i) const;
 
   private:
   };
diff --git a/moses/TranslationModel/UG/mmsapt_align.cc b/moses/TranslationModel/UG/mmsapt_align.cc
index 407df648d8..8b6bf1eb2f 100644
--- a/moses/TranslationModel/UG/mmsapt_align.cc
+++ b/moses/TranslationModel/UG/mmsapt_align.cc
@@ -1,335 +1,336 @@
 #include "mmsapt.h"
+// currently broken
 
-namespace Moses
-{
-  using namespace bitext;
-  using namespace std;
-  using namespace boost;
+// namespace Moses
+// {
+//   using namespace bitext;
+//   using namespace std;
+//   using namespace boost;
   
-  struct PPgreater
-  {
-    bool operator()(PhrasePair const& a, PhrasePair const& b)
-    {
-      return a.score > b.score;
-    }
-  };
+//   struct PPgreater
+//   {
+//     bool operator()(PhrasePair const& a, PhrasePair const& b)
+//     {
+//       return a.score > b.score;
+//     }
+//   };
 
-  void
-  Mmsapt::
-  setWeights(vector<float> const & w)
-  {
-    assert(w.size() == this->m_numScoreComponents);
-    this->feature_weights = w;
-  }
+//   void
+//   Mmsapt::
+//   setWeights(vector<float> const & w)
+//   {
+//     assert(w.size() == this->m_numScoreComponents);
+//     this->feature_weights = w;
+//   }
 
-  struct PhraseAlnHyp
-  {
-    PhrasePair pp;
-    ushort   s1,e1,s2,e2; // start and end positions
-    int             prev; // preceding alignment hypothesis
-    float          score; 
-    bitvector       scov; // source coverage
-    PhraseAlnHyp(PhrasePair const& ppx, int slen,
-		 pair<uint32_t,uint32_t> const& sspan,
-		 pair<uint32_t,uint32_t> const& tspan)
-      : pp(ppx), prev(-1), score(ppx.score), scov(slen)
-    {
-      s1 = sspan.first; e1 = sspan.second;
-      s2 = tspan.first; e2 = tspan.second;
-      for (size_t i = s1; i < e1; ++i) 
-	scov.set(i);
-    }
+//   struct PhraseAlnHyp
+//   {
+//     PhrasePair pp;
+//     ushort   s1,e1,s2,e2; // start and end positions
+//     int             prev; // preceding alignment hypothesis
+//     float          score; 
+//     bitvector       scov; // source coverage
+//     PhraseAlnHyp(PhrasePair const& ppx, int slen,
+// 		 pair<uint32_t,uint32_t> const& sspan,
+// 		 pair<uint32_t,uint32_t> const& tspan)
+//       : pp(ppx), prev(-1), score(ppx.score), scov(slen)
+//     {
+//       s1 = sspan.first; e1 = sspan.second;
+//       s2 = tspan.first; e2 = tspan.second;
+//       for (size_t i = s1; i < e1; ++i) 
+// 	scov.set(i);
+//     }
 
-    bool operator<(PhraseAlnHyp const& other) const
-    {
-      return this->score < other.score;
-    }
+//     bool operator<(PhraseAlnHyp const& other) const
+//     {
+//       return this->score < other.score;
+//     }
 
-    bool operator>(PhraseAlnHyp const& other) const
-    {
-      return this->score > other.score;
-    }
+//     bool operator>(PhraseAlnHyp const& other) const
+//     {
+//       return this->score > other.score;
+//     }
 
-    PhraseOrientation
-    po_bwd(PhraseAlnHyp const* prev) const
-    {
-      if (s2 == 0) return po_first;
-      assert(prev);
-      assert(prev->e2 <= s2);
-      if (prev->e2 < s2)  return po_other;
-      if (prev->e1 == s1) return po_mono;
-      if (prev->e1 < s1)  return po_jfwd;
-      if (prev->s1 == e1) return po_swap;
-      if (prev->s1 > e1)  return po_jbwd;
-      return po_other;
-    }
+//     PhraseOrientation
+//     po_bwd(PhraseAlnHyp const* prev) const
+//     {
+//       if (s2 == 0) return po_first;
+//       assert(prev);
+//       assert(prev->e2 <= s2);
+//       if (prev->e2 < s2)  return po_other;
+//       if (prev->e1 == s1) return po_mono;
+//       if (prev->e1 < s1)  return po_jfwd;
+//       if (prev->s1 == e1) return po_swap;
+//       if (prev->s1 > e1)  return po_jbwd;
+//       return po_other;
+//     }
 
-    PhraseOrientation
-    po_fwd(PhraseAlnHyp const* next) const
-    {
-      if (!next) return po_last;
-      assert(next->s2 >= e2);
-      if (next->s2 < e2)  return po_other;
-      if (next->e1 == s1) return po_swap;
-      if (next->e1 < s1)  return po_jbwd;
-      if (next->s1 == e1) return po_mono;
-      if (next->s1 > e1)  return po_jfwd;
-      return po_other;
-    }
+//     PhraseOrientation
+//     po_fwd(PhraseAlnHyp const* next) const
+//     {
+//       if (!next) return po_last;
+//       assert(next->s2 >= e2);
+//       if (next->s2 < e2)  return po_other;
+//       if (next->e1 == s1) return po_swap;
+//       if (next->e1 < s1)  return po_jbwd;
+//       if (next->s1 == e1) return po_mono;
+//       if (next->s1 > e1)  return po_jfwd;
+//       return po_other;
+//     }
 
-    float 
-    dprob_fwd(PhraseAlnHyp const& next)
-    {
-      return pp.dfwd[po_fwd(&next)];
-    }
+//     float 
+//     dprob_fwd(PhraseAlnHyp const& next)
+//     {
+//       return pp.dfwd[po_fwd(&next)];
+//     }
 
-    float 
-    dprob_bwd(PhraseAlnHyp const& prev)
-    {
-      return pp.dbwd[po_bwd(&prev)];
-    }
+//     float 
+//     dprob_bwd(PhraseAlnHyp const& prev)
+//     {
+//       return pp.dbwd[po_bwd(&prev)];
+//     }
 
-  };
+//   };
 
-  class Alignment
-  {
-    typedef L2R_Token<SimpleWordId> Token;
-    typedef TSA<Token>           tsa;
-    typedef pair<uint32_t, uint32_t>  span;
-    typedef vector<vector<uint64_t> > pidmap_t; // span -> phrase ID
-    typedef boost::unordered_map<uint64_t,vector<span> > pid2span_t;
-    typedef pstats::trg_map_t jStatsTable;
+//   class Alignment
+//   {
+//     typedef L2R_Token<SimpleWordId> Token;
+//     typedef TSA<Token>           tsa;
+//     typedef pair<uint32_t, uint32_t>  span;
+//     typedef vector<vector<uint64_t> > pidmap_t; // span -> phrase ID
+//     typedef boost::unordered_map<uint64_t,vector<span> > pid2span_t;
+//     typedef pstats::trg_map_t jStatsTable;
 
-    Mmsapt const& PT;
-    vector<id_type> s,t; 
-    pidmap_t   sspan2pid, tspan2pid; // span -> phrase ID
-    pid2span_t spid2span,tpid2span;
-    vector<vector<sptr<pstats> > > spstats;
+//     Mmsapt const& PT;
+//     vector<id_type> s,t; 
+//     pidmap_t   sspan2pid, tspan2pid; // span -> phrase ID
+//     pid2span_t spid2span,tpid2span;
+//     vector<vector<sptr<pstats> > > spstats;
 
-    vector<PhrasePair> PP; 
-    // position-independent phrase pair info
-  public:
-    vector<PhraseAlnHyp> PAH;  
-    vector<vector<int> > tpos2ahyp;
-    // maps from target start positions to PhraseAlnHyps starting at
-    // that position
+//     vector<PhrasePair> PP; 
+//     // position-independent phrase pair info
+//   public:
+//     vector<PhraseAlnHyp> PAH;  
+//     vector<vector<int> > tpos2ahyp;
+//     // maps from target start positions to PhraseAlnHyps starting at
+//     // that position
 
-    sptr<pstats> getPstats(span const& sspan);
-    void fill_tspan_maps();
-    void fill_sspan_maps();
-  public:
-    Alignment(Mmsapt const& pt, string const& src, string const& trg);
-    void show(ostream& out); 
-    void show(ostream& out, PhraseAlnHyp const& ah); 
-  };
+//     sptr<pstats> getPstats(span const& sspan);
+//     void fill_tspan_maps();
+//     void fill_sspan_maps();
+//   public:
+//     Alignment(Mmsapt const& pt, string const& src, string const& trg);
+//     void show(ostream& out); 
+//     void show(ostream& out, PhraseAlnHyp const& ah); 
+//   };
 
-  void
-  Alignment::
-  show(ostream& out, PhraseAlnHyp const& ah)
-  {
-#if 0
-    LexicalPhraseScorer2<Token>::table_t const& 
-      COOCjnt = PT.calc_lex.scorer.COOC;
+//   void
+//   Alignment::
+//   show(ostream& out, PhraseAlnHyp const& ah)
+//   {
+// #if 0
+//     LexicalPhraseScorer2<Token>::table_t const& 
+//       COOCjnt = PT.calc_lex.scorer.COOC;
 
-    out << setw(10) << exp(ah.score) << " "
-	<< PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2) 
-	<< " <=> "
-	<< PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
-    vector<uchar> const& a = ah.pp.aln;
-    // BOOST_FOREACH(int x,a) cout << "[" << x << "] ";
-    for (size_t u = 0; u+1 < a.size(); u += 2)
-      out << " " << int(a[u+1]) << "-" << int(a[u]);
+//     out << setw(10) << exp(ah.score) << " "
+// 	<< PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2) 
+// 	<< " <=> "
+// 	<< PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
+//     vector<uchar> const& a = ah.pp.aln;
+//     // BOOST_FOREACH(int x,a) cout << "[" << x << "] ";
+//     for (size_t u = 0; u+1 < a.size(); u += 2)
+//       out << " " << int(a[u+1]) << "-" << int(a[u]);
 
-    if (ah.e2-ah.s2 == 1 and ah.e1-ah.s1 == 1)
-      out << " " << COOCjnt[s[ah.s1]][t[ah.s2]]
-	  << "/" << PT.COOCraw[s[ah.s1]][t[ah.s2]]
-	  << "=" << float(COOCjnt[s[ah.s1]][t[ah.s2]])/PT.COOCraw[s[ah.s1]][t[ah.s2]];
-    out << endl;
-    // float const* ofwdj = ah.pp.dfwd;
-    // float const* obwdj = ah.pp.dbwd;
-    // uint32_t const* ofwdm = spstats[ah.s1][ah.e1-ah.s1-1]->ofwd;
-    // uint32_t const* obwdm = spstats[ah.s1][ah.e1-ah.s1-1]->obwd;
-    // out << "   [first: " << ofwdj[po_first]<<"/"<<ofwdm[po_first]
-    // 	 <<     " last: " << ofwdj[po_last]<<"/"<<ofwdm[po_last]
-    // 	 <<     " mono: " << ofwdj[po_mono]<<"/"<<ofwdm[po_mono]
-    // 	 <<     " jfwd: " << ofwdj[po_jfwd]<<"/"<<ofwdm[po_jfwd]
-    // 	 <<     " swap: " << ofwdj[po_swap]<<"/"<<ofwdm[po_swap]
-    // 	 <<     " jbwd: " << ofwdj[po_jbwd]<<"/"<<ofwdm[po_jbwd]
-    // 	 <<     " other: " << ofwdj[po_other]<<"/"<<ofwdm[po_other]
-    // 	 << "]" << endl
-    // 	 << "   [first: " << obwdj[po_first]<<"/"<<obwdm[po_first]
-    // 	 <<     " last: " << obwdj[po_last]<<"/"<<obwdm[po_last]
-    // 	 <<     " mono: " << obwdj[po_mono]<<"/"<<obwdm[po_mono]
-    // 	 <<     " jfwd: " << obwdj[po_jfwd]<<"/"<<obwdm[po_jfwd]
-    // 	 <<     " swap: " << obwdj[po_swap]<<"/"<<obwdm[po_swap]
-    // 	 <<     " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
-    // 	 <<     " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
-    // 	 << "]" << endl;
-#endif
-  }
+//     if (ah.e2-ah.s2 == 1 and ah.e1-ah.s1 == 1)
+//       out << " " << COOCjnt[s[ah.s1]][t[ah.s2]]
+// 	  << "/" << PT.COOCraw[s[ah.s1]][t[ah.s2]]
+// 	  << "=" << float(COOCjnt[s[ah.s1]][t[ah.s2]])/PT.COOCraw[s[ah.s1]][t[ah.s2]];
+//     out << endl;
+//     // float const* ofwdj = ah.pp.dfwd;
+//     // float const* obwdj = ah.pp.dbwd;
+//     // uint32_t const* ofwdm = spstats[ah.s1][ah.e1-ah.s1-1]->ofwd;
+//     // uint32_t const* obwdm = spstats[ah.s1][ah.e1-ah.s1-1]->obwd;
+//     // out << "   [first: " << ofwdj[po_first]<<"/"<<ofwdm[po_first]
+//     // 	 <<     " last: " << ofwdj[po_last]<<"/"<<ofwdm[po_last]
+//     // 	 <<     " mono: " << ofwdj[po_mono]<<"/"<<ofwdm[po_mono]
+//     // 	 <<     " jfwd: " << ofwdj[po_jfwd]<<"/"<<ofwdm[po_jfwd]
+//     // 	 <<     " swap: " << ofwdj[po_swap]<<"/"<<ofwdm[po_swap]
+//     // 	 <<     " jbwd: " << ofwdj[po_jbwd]<<"/"<<ofwdm[po_jbwd]
+//     // 	 <<     " other: " << ofwdj[po_other]<<"/"<<ofwdm[po_other]
+//     // 	 << "]" << endl
+//     // 	 << "   [first: " << obwdj[po_first]<<"/"<<obwdm[po_first]
+//     // 	 <<     " last: " << obwdj[po_last]<<"/"<<obwdm[po_last]
+//     // 	 <<     " mono: " << obwdj[po_mono]<<"/"<<obwdm[po_mono]
+//     // 	 <<     " jfwd: " << obwdj[po_jfwd]<<"/"<<obwdm[po_jfwd]
+//     // 	 <<     " swap: " << obwdj[po_swap]<<"/"<<obwdm[po_swap]
+//     // 	 <<     " jbwd: " << obwdj[po_jbwd]<<"/"<<obwdm[po_jbwd]
+//     // 	 <<     " other: " << obwdj[po_other]<<"/"<<obwdm[po_other]
+//     // 	 << "]" << endl;
+// #endif
+//   }
   
-  void
-  Alignment::
-  show(ostream& out)
-  {
-    // show what we have so far ...
-    for (size_t s2 = 0; s2 < t.size(); ++s2)
-      {
-	VectorIndexSorter<PhraseAlnHyp> foo(PAH);
-	sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo);
-	for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h)
-	  show(out,PAH[tpos2ahyp[s2][h]]);
-      }
-  }
+//   void
+//   Alignment::
+//   show(ostream& out)
+//   {
+//     // show what we have so far ...
+//     for (size_t s2 = 0; s2 < t.size(); ++s2)
+//       {
+// 	VectorIndexSorter<PhraseAlnHyp> foo(PAH);
+// 	sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo);
+// 	for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h)
+// 	  show(out,PAH[tpos2ahyp[s2][h]]);
+//       }
+//   }
 
-  sptr<pstats>
-  Alignment::
-  getPstats(span const& sspan)
-  {
-    size_t k = sspan.second - sspan.first - 1;
-    if (k < spstats[sspan.first].size())
-      return spstats[sspan.first][k];
-    else return sptr<pstats>();
-  }
+//   sptr<pstats>
+//   Alignment::
+//   getPstats(span const& sspan)
+//   {
+//     size_t k = sspan.second - sspan.first - 1;
+//     if (k < spstats[sspan.first].size())
+//       return spstats[sspan.first][k];
+//     else return sptr<pstats>();
+//   }
   
-  void
-  Alignment::
-  fill_tspan_maps()
-  {
-    tspan2pid.assign(t.size(),vector<uint64_t>(t.size(),0));
-    for (size_t i = 0; i < t.size(); ++i)
-      {
-	tsa::tree_iterator m(PT.btfix.I2.get());
-	for (size_t k = i; k < t.size() && m.extend(t[k]); ++k)
-	  {
-	    uint64_t pid = m.getPid();
-	    tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
-	    tspan2pid[i][k] = pid;
-	  }
-      } 
-  }
+//   void
+//   Alignment::
+//   fill_tspan_maps()
+//   {
+//     tspan2pid.assign(t.size(),vector<uint64_t>(t.size(),0));
+//     for (size_t i = 0; i < t.size(); ++i)
+//       {
+// 	tsa::tree_iterator m(PT.btfix.I2.get());
+// 	for (size_t k = i; k < t.size() && m.extend(t[k]); ++k)
+// 	  {
+// 	    uint64_t pid = m.getPid();
+// 	    tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
+// 	    tspan2pid[i][k] = pid;
+// 	  }
+//       } 
+//   }
 
-  void
-  Alignment::
-  fill_sspan_maps()
-  {
-    sspan2pid.assign(s.size(),vector<uint64_t>(s.size(),0));
-    spstats.resize(s.size());
-    for (size_t i = 0; i < s.size(); ++i)
-      {
-	tsa::tree_iterator m(PT.btfix.I1.get());
-	for (size_t k = i; k < s.size() && m.extend(s[k]); ++k)
-	  {
-	    uint64_t pid = m.getPid();
-	    sspan2pid[i][k] = pid;
-	    pid2span_t::iterator p = spid2span.find(pid);
-	    if (p != spid2span.end())
-	      {
-		int x = p->second[0].first;
-		int y = p->second[0].second-1;
-		spstats[i].push_back(spstats[x][y-x]);
-	      }
-	    else 
-	      {
-		spstats[i].push_back(PT.btfix.lookup(m));
-		cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
-		     << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt 
-		     << endl;
-	      }
-	    spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
-	  }
-      }
-  }
+//   void
+//   Alignment::
+//   fill_sspan_maps()
+//   {
+//     sspan2pid.assign(s.size(),vector<uint64_t>(s.size(),0));
+//     spstats.resize(s.size());
+//     for (size_t i = 0; i < s.size(); ++i)
+//       {
+// 	tsa::tree_iterator m(PT.btfix.I1.get());
+// 	for (size_t k = i; k < s.size() && m.extend(s[k]); ++k)
+// 	  {
+// 	    uint64_t pid = m.getPid();
+// 	    sspan2pid[i][k] = pid;
+// 	    pid2span_t::iterator p = spid2span.find(pid);
+// 	    if (p != spid2span.end())
+// 	      {
+// 		int x = p->second[0].first;
+// 		int y = p->second[0].second-1;
+// 		spstats[i].push_back(spstats[x][y-x]);
+// 	      }
+// 	    else 
+// 	      {
+// 		spstats[i].push_back(PT.btfix.lookup(m));
+// 		cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
+// 		     << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt 
+// 		     << endl;
+// 	      }
+// 	    spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
+// 	  }
+//       }
+//   }
 
-  Alignment::
-  Alignment(Mmsapt const& pt, string const& src, string const& trg)
-    : PT(pt)
-  {
-    PT.btfix.V1->fillIdSeq(src,s);
-    PT.btfix.V2->fillIdSeq(trg,t);
+//   Alignment::
+//   Alignment(Mmsapt const& pt, string const& src, string const& trg)
+//     : PT(pt)
+//   {
+//     PT.btfix.V1->fillIdSeq(src,s);
+//     PT.btfix.V2->fillIdSeq(trg,t);
 
-    // LexicalPhraseScorer2<Token>::table_t const& COOC = PT.calc_lex.scorer.COOC;
-    // BOOST_FOREACH(id_type i, t)
-    //   {
-    // 	cout << (*PT.btfix.V2)[i];
-    // 	if (i < PT.wlex21.size())
-    // 	  {
-    // 	    BOOST_FOREACH(id_type k, PT.wlex21[i])
-    // 	      {
-    // 		size_t  j = COOC[k][i];
-    // 		size_t m1 = COOC.m1(k);
-    // 		size_t m2 = COOC.m2(i);
-    // 		if (j*1000 > m1 && j*1000 > m2)
-    // 		  cout << " " << (*PT.btfix.V1)[k];
-    // 	      }	 
-    // 	  }
-    // 	cout << endl;
-    //   }
+//     // LexicalPhraseScorer2<Token>::table_t const& COOC = PT.calc_lex.scorer.COOC;
+//     // BOOST_FOREACH(id_type i, t)
+//     //   {
+//     // 	cout << (*PT.btfix.V2)[i];
+//     // 	if (i < PT.wlex21.size())
+//     // 	  {
+//     // 	    BOOST_FOREACH(id_type k, PT.wlex21[i])
+//     // 	      {
+//     // 		size_t  j = COOC[k][i];
+//     // 		size_t m1 = COOC.m1(k);
+//     // 		size_t m2 = COOC.m2(i);
+//     // 		if (j*1000 > m1 && j*1000 > m2)
+//     // 		  cout << " " << (*PT.btfix.V1)[k];
+//     // 	      }	 
+//     // 	  }
+//     // 	cout << endl;
+//     //   }
     
-    fill_tspan_maps();
-    fill_sspan_maps();
-    tpos2ahyp.resize(t.size()); 
-    // now fill the association score table
-    PAH.reserve(1000000);
-    typedef pid2span_t::iterator psiter;
-    for (psiter L = spid2span.begin(); L != spid2span.end(); ++L)
-      {
-	if (!L->second.size()) continue; // should never happen anyway
-	int i = L->second[0].first;
-	int k = L->second[0].second - i -1;
-	sptr<pstats> ps = spstats[i][k];
-	PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents);
-	jStatsTable & J = ps->trg;
-	for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y)
-	  {
-	    psiter R = tpid2span.find(y->first);
-	    if (R == tpid2span.end()) continue;
-	    pp.update(y->first, y->second);
-	    PT.ScorePPfix(pp);
-	    pp.eval(PT.feature_weights);
-	    PP.push_back(pp);
-	    BOOST_FOREACH(span const& sspan, L->second)
-	      {
-		BOOST_FOREACH(span const& tspan, R->second)
-		  {
-		    tpos2ahyp[tspan.first].push_back(PAH.size());
-		    PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan));
-		  }
-	      }
-	  }
-      }
-  }
+//     fill_tspan_maps();
+//     fill_sspan_maps();
+//     tpos2ahyp.resize(t.size()); 
+//     // now fill the association score table
+//     PAH.reserve(1000000);
+//     typedef pid2span_t::iterator psiter;
+//     for (psiter L = spid2span.begin(); L != spid2span.end(); ++L)
+//       {
+// 	if (!L->second.size()) continue; // should never happen anyway
+// 	int i = L->second[0].first;
+// 	int k = L->second[0].second - i -1;
+// 	sptr<pstats> ps = spstats[i][k];
+// 	PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents);
+// 	jStatsTable & J = ps->trg;
+// 	for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y)
+// 	  {
+// 	    psiter R = tpid2span.find(y->first);
+// 	    if (R == tpid2span.end()) continue;
+// 	    pp.update(y->first, y->second);
+// 	    PT.ScorePPfix(pp);
+// 	    pp.eval(PT.feature_weights);
+// 	    PP.push_back(pp);
+// 	    BOOST_FOREACH(span const& sspan, L->second)
+// 	      {
+// 		BOOST_FOREACH(span const& tspan, R->second)
+// 		  {
+// 		    tpos2ahyp[tspan.first].push_back(PAH.size());
+// 		    PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan));
+// 		  }
+// 	      }
+// 	  }
+//       }
+//   }
 
     
 
-  int
-  extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
-  {
-    if ((PAH[edge].scov & PAH[next].scov).count()) 
-      return -1;
-    int ret = PAH.size();
-    PAH.push_back(PAH[next]);
-    PhraseAlnHyp & h = PAH.back();
-    h.prev  = edge;
-    h.scov |= PAH[edge].scov;
-    h.score += log(PAH[edge].dprob_fwd(PAH[next]));
-    h.score += log(PAH[next].dprob_bwd(PAH[edge]));
-    return ret;
-  }
+//   int
+//   extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
+//   {
+//     if ((PAH[edge].scov & PAH[next].scov).count()) 
+//       return -1;
+//     int ret = PAH.size();
+//     PAH.push_back(PAH[next]);
+//     PhraseAlnHyp & h = PAH.back();
+//     h.prev  = edge;
+//     h.scov |= PAH[edge].scov;
+//     h.score += log(PAH[edge].dprob_fwd(PAH[next]));
+//     h.score += log(PAH[next].dprob_bwd(PAH[edge]));
+//     return ret;
+//   }
 
-  sptr<vector<int> >
-  Mmsapt::
-  align(string const& src, string const& trg) const
-  {
-    // For the time being, we consult only the fixed bitext.
-    // We might also consider the dynamic bitext. => TO DO.
-    Alignment A(*this,src,trg);
-    VectorIndexSorter<PhraseAlnHyp> foo(A.PAH);
-    vector<size_t> o; foo.GetOrder(o);
-    BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
-    sptr<vector<int> > aln;
-    return aln;
-}
-}
+//   sptr<vector<int> >
+//   Mmsapt::
+//   align(string const& src, string const& trg) const
+//   {
+//     // For the time being, we consult only the fixed bitext.
+//     // We might also consider the dynamic bitext. => TO DO.
+//     Alignment A(*this,src,trg);
+//     VectorIndexSorter<PhraseAlnHyp> foo(A.PAH);
+//     vector<size_t> o; foo.GetOrder(o);
+//     BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]);
+//     sptr<vector<int> > aln;
+//     return aln;
+// }
+// }
 
 
diff --git a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h
index 6e852b44b5..083afb3a32 100644
--- a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h
+++ b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h
@@ -1,268 +1,17 @@
 // -*- c++ -*-
+// written by Ulrich Germann 
 #pragma once
 #include "moses/TranslationModel/UG/mm/ug_bitext.h"
 #include "util/exception.hh"
+#include "boost/format.hpp"
+#include "sapt_pscore_base.h"
+
+// DEPRECATED CODE: Word and phrase penalties are now 
+// added by the decoder.
 
 namespace Moses {
   namespace bitext
   {
-
-    template<typename Token>
-    class
-    PhraseScorer
-    {
-    protected:
-      int m_index;
-      int m_num_feats;
-      vector<string> m_feature_names;
-    public:
- 
-      virtual 
-      void 
-      operator()(Bitext<Token> const& pt, PhrasePair& pp, vector<float> * dest=NULL) 
-	const = 0;
-    
-      int 
-      fcnt() const 
-      { return m_num_feats; }
-    
-      vector<string> const &
-      fnames() const
-      { return m_feature_names; }
-
-      string const &
-      fname(int i) const
-      { 
-	UTIL_THROW_IF2((i < m_index || i >= m_index + m_num_feats),
-		       "Feature name index out of range at " 
-		       << __FILE__ << ":" << __LINE__);
-	return m_feature_names.at(i - m_index); 
-      }
-    
-      int 
-      getIndex() const 
-      { return m_index; }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-  
-    template<typename Token>
-    class
-    PScorePfwd : public PhraseScorer<Token>
-    {
-      float conf;
-      char denom;
-    public:
-      PScorePfwd() 
-      {
-	this->m_num_feats = 1;
-      }
-
-      int 
-      init(int const i, float const c, char d) 
-      { 
-	conf  = c; 
-	denom = d;
-	this->m_index = i;
-	ostringstream buf;
-	buf << format("pfwd-%c%.3f") % denom % c;
-	this->m_feature_names.push_back(buf.str());
-	return i + this->m_num_feats;
-      }
-
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair & pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	if (pp.joint > pp.good1) 
-	  {
-	    cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
-	    cerr<<pp.joint<<"/"<<pp.good1<<"/"<<pp.raw2<<endl;
-	  }
-	switch (denom)
-	  {
-	  case 'g': 
-	    (*dest)[this->m_index] = log(lbop(pp.good1, pp.joint, conf)); 
-	    break;
-	  case 's': 
-	    (*dest)[this->m_index] = log(lbop(pp.sample1, pp.joint, conf)); 
-	    break;
-	  case 'r':
-	    (*dest)[this->m_index] = log(lbop(pp.raw1, pp.joint, conf)); 
-	  }
-      }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-
-    template<typename Token>
-    class
-    PScorePbwd : public PhraseScorer<Token>
-    {
-      float conf;
-      char denom;
-    public:
-      PScorePbwd() 
-      {
-	this->m_num_feats = 1;
-      }
-
-      int 
-      init(int const i, float const c, char d) 
-      { 
-	conf = c; 
-	denom = d;
-	this->m_index = i;
-	ostringstream buf;
-	buf << format("pbwd-%c%.3f") % denom % c;
-	this->m_feature_names.push_back(buf.str());
-	return i + this->m_num_feats;
-      }
-
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	// we use the denominator specification to scale the raw counts on the 
-	// target side; the clean way would be to counter-sample
-	uint32_t r2 = pp.raw2;
-	if      (denom == 'g') r2 = round(r2 * float(pp.good1)   / pp.raw1);
-	else if (denom == 's') r2 = round(r2 * float(pp.sample1) / pp.raw1);
-	(*dest)[this->m_index] = log(lbop(max(r2, pp.joint),pp.joint,conf));
-      }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-
-    template<typename Token>
-    class
-    PScoreCoherence : public PhraseScorer<Token>
-    {
-    public:
-      PScoreCoherence() 
-      {
-	this->m_num_feats = 1;
-      }
-    
-      int 
-      init(int const i) 
-      { 
-	this->m_index = i;
-	this->m_feature_names.push_back(string("coherence"));
-	return i + this->m_num_feats;
-      }
-
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	(*dest)[this->m_index] = log(pp.good1) - log(pp.sample1);
-      }
-    };
-  
-    ////////////////////////////////////////////////////////////////////////////////
-
-    template<typename Token>
-    class
-    PScoreLogCounts : public PhraseScorer<Token>
-    {
-      float conf;
-    public:
-      PScoreLogCounts() 
-      {
-	this->m_num_feats = 5;
-      }
-    
-      int 
-      init(int const i) 
-      { 
-	this->m_index = i;
-	this->m_feature_names.push_back("log-r1");
-	this->m_feature_names.push_back("log-s1");
-	this->m_feature_names.push_back("log-g1");
-	this->m_feature_names.push_back("log-j");
-	this->m_feature_names.push_back("log-r2");
-	return i + this->m_num_feats;
-      }
-    
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, 
-		 vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	size_t i = this->m_index;
-	assert(pp.raw1);
-	assert(pp.sample1);
-	assert(pp.good1);
-	assert(pp.joint);
-	assert(pp.raw2);
-	(*dest)[i]   = -log(pp.raw1);
-	(*dest)[++i] = -log(pp.sample1);
-	(*dest)[++i] = -log(pp.good1);
-	(*dest)[++i] = +log(pp.joint);
-	(*dest)[++i] = -log(pp.raw2);
-      }
-    };
-  
-    template<typename Token>
-    class
-    PScoreLex : public PhraseScorer<Token>
-    {
-      float const m_alpha;
-    public:
-      LexicalPhraseScorer2<Token> scorer;
-    
-      PScoreLex(float const a) 
-	: m_alpha(a) 
-      { this->m_num_feats = 2; }
-    
-      int 
-      init(int const i, string const& fname) 
-      { 
-	scorer.open(fname); 
-	this->m_index = i;
-	this->m_feature_names.push_back("lexfwd");
-	this->m_feature_names.push_back("lexbwd");
-	return i + this->m_num_feats;
-      }
-    
-      void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
-      {
-	if (!dest) dest = &pp.fvals;
-	uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
-	parse_pid(pp.p1, sid1, off1, len1);
-	parse_pid(pp.p2, sid2, off2, len2);
-	
-#if 0
-	cout << len1 << " " << len2 << endl;
-	Token const* t1 = bt.T1->sntStart(sid1);
-	for (size_t i = off1; i < off1 + len1; ++i)
-	  cout << (*bt.V1)[t1[i].id()] << " "; 
-	cout << __FILE__ << ":" << __LINE__ << endl;
-	
-	Token const* t2 = bt.T2->sntStart(sid2);
-	for (size_t i = off2; i < off2 + len2; ++i)
-	  cout << (*bt.V2)[t2[i].id()] << " "; 
-	cout << __FILE__ << ":" << __LINE__ << endl;
-	
-	BOOST_FOREACH (int a, pp.aln)
-	  cout << a << " " ;
-	cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
-	
-#endif
-	scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
-		     bt.T2->sntStart(sid2)+off2,0,len2,
-		     pp.aln, m_alpha,
-		     (*dest)[this->m_index],
-		     (*dest)[this->m_index+1]);
-      }
-      
-    };
-  
     /// Word penalty
     template<typename Token>
     class
@@ -280,7 +29,8 @@ namespace Moses {
       }
     
       void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
+      operator()(Bitext<Token> const& bt, PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
       {
 	if (!dest) dest = &pp.fvals;
 	uint32_t sid2=0,off2=0,len2=0;
@@ -307,7 +57,8 @@ namespace Moses {
       }
     
       void 
-      operator()(Bitext<Token> const& bt, PhrasePair& pp, vector<float> * dest = NULL) const
+      operator()(Bitext<Token> const& bt, PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
       {
 	if (!dest) dest = &pp.fvals;
 	(*dest)[this->m_index] = 1;
diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc
index 106505f053..2cbf89b166 100644
--- a/moses/TranslationModel/UG/ptable-lookup.cc
+++ b/moses/TranslationModel/UG/ptable-lookup.cc
@@ -106,15 +106,11 @@ int main(int argc, char* argv[])
       	  cout << "   ";
       	  for (size_t k = idx.first; k < idx.second; ++k)
       	    {
-      	      if (mmsapt && fname[k-idx.first].substr(0,3) == "log")
-      		{
-      		  if(scores[k] < 0)
-      		    cout << " " << format("%10d") % round(exp(-scores[k]));
-      		  else
-      		    cout << " " << format("%10d") % round(exp(scores[k]));
-      		}
-      	      else
-      		cout << " " << format("%10.8f") % exp(scores[k]);
+	      size_t j = k-idx.first;
+	      float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
+			 : scores[k] < 0 ? exp(scores[k]) : scores[k]);
+	      string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
+	      cout << " " << format(fmt) % f;
       	    }
       	  cout << endl;
       	}
diff --git a/moses/TranslationModel/UG/sapt_phrase_key.h b/moses/TranslationModel/UG/sapt_phrase_key.h
new file mode 100644
index 0000000000..e1ecf15739
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_phrase_key.h
@@ -0,0 +1,13 @@
+//-*- c++ -*-
+#pragma once
+#include <stdint.h>
+
+using namespace std;
+namespace sapt
+{
+  using namespace Moses;
+  using namespace std;
+
+    
+
+}
diff --git a/moses/TranslationModel/UG/sapt_phrase_scorers.h b/moses/TranslationModel/UG/sapt_phrase_scorers.h
new file mode 100644
index 0000000000..37cfd26fde
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_phrase_scorers.h
@@ -0,0 +1,12 @@
+// -*- c++ -*-
+// Phrase scoring functions for suffix array-based phrase tables
+// written by Ulrich Germann 
+#pragma once
+#include "sapt_pscore_unaligned.h"   // count # of unaligned words
+#include "sapt_pscore_provenance.h"  // reward for joint phrase occ. per corpus
+#include "sapt_pscore_rareness.h"    // penalty for rare occurrences (global?)
+#include "sapt_pscore_logcnt.h"      // logs of observed counts
+#include "sapt_pscore_lex1.h"        // plain vanilla Moses lexical scores
+#include "sapt_pscore_pfwd.h"        // fwd phrase prob
+#include "sapt_pscore_pbwd.h"        // bwd phrase prob
+#include "sapt_pscore_coherence.h"   // coherence feature: good/sample-size
diff --git a/moses/TranslationModel/UG/sapt_pscore_base.h b/moses/TranslationModel/UG/sapt_pscore_base.h
new file mode 100644
index 0000000000..68a491145c
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_base.h
@@ -0,0 +1,103 @@
+// -*- c++ -*-
+// Base classes for suffix array-based phrase scorers
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+
+    // abstract base class that defines the common API for phrase scorers
+    template<typename Token>
+    class
+    PhraseScorer
+    {
+    protected:
+      int m_index;
+      int m_num_feats;
+      string m_tag;
+      vector<string> m_feature_names;
+    public:
+ 
+      virtual 
+      void 
+      operator()(Bitext<Token> const& pt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest=NULL) 
+	const = 0;
+
+      void
+      setIndex(int const i) { m_index = i; }
+    
+      int
+      getIndex() const { return m_index; }
+
+      int 
+      fcnt() const { return m_num_feats; }
+    
+      vector<string> const &
+      fnames() const { return m_feature_names; }
+
+      string const &
+      fname(int i) const
+      { 
+	if (i < 0) i += m_num_feats;
+	UTIL_THROW_IF2(i < 0 || i >= m_num_feats,
+		       "Feature name index out of range at " << HERE);
+	return m_feature_names.at(i); 
+      }
+
+      virtual
+      bool
+      isLogVal(int i) const  { return true; }; 
+      // is this feature log valued? 
+    
+      virtual
+      bool
+      isIntegerValued(int i) const  { return false; }; 
+      // is this feature integer valued (e.g., count features)? 
+
+      virtual
+      bool
+      allowPooling() const { return true; }
+      // does this feature function allow pooling of counts if 
+      // there are no occurrences in the respective corpus?
+      
+    };
+
+    // base class for 'families' of phrase scorers that have a single 
+    template<typename Token>
+    class
+    SingleRealValuedParameterPhraseScorerFamily 
+      : public PhraseScorer<Token>
+    {
+    protected:
+      vector<float> m_x;
+
+      virtual 
+      void 
+      init(string const specs) 
+      { 
+	using namespace boost;
+	UTIL_THROW_IF2(this->m_tag.size() == 0, 
+		       "m_tag must be initialized in constructor");
+	UTIL_THROW_IF2(specs.size() == 0,"empty specification string!");
+	UTIL_THROW_IF2(this->m_feature_names.size(),
+		       "PhraseScorer can only be initialized once!");
+	this->m_index = -1;
+	float x; char c;
+	for (istringstream buf(specs); buf>>x; buf>>c)
+	  {
+	    this->m_x.push_back(x);
+	    string fname = (format("%s-%.2f") % this->m_tag % x).str();
+	    this->m_feature_names.push_back(fname);
+	  }
+	this->m_num_feats = this->m_x.size();
+      }
+    };
+  } // namespace bitext
+} // namespace moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_coherence.h b/moses/TranslationModel/UG/sapt_pscore_coherence.h
new file mode 100644
index 0000000000..a3211df542
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_coherence.h
@@ -0,0 +1,33 @@
+// -*- c++ -*-
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScoreCoherence : public PhraseScorer<Token>
+    {
+    public:
+      PScoreCoherence(string const dummy) 
+      { 
+	this->m_index = -1;
+	this->m_num_feats = 1;
+	this->m_feature_names.push_back(string("coherence"));
+      }
+      
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	(*dest)[this->m_index] = log(pp.good1) - log(pp.sample1);
+      }
+    };
+  }
+}
diff --git a/moses/TranslationModel/UG/sapt_pscore_lex1.h b/moses/TranslationModel/UG/sapt_pscore_lex1.h
new file mode 100644
index 0000000000..be994b0d38
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_lex1.h
@@ -0,0 +1,70 @@
+// -*- c++ -*-
+// Phrase scorer that counts the number of unaligend words in the phrase
+// written by Ulrich Germann 
+
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScoreLex1 : public PhraseScorer<Token>
+    {
+      float m_alpha;
+    public:
+      LexicalPhraseScorer2<Token> scorer;
+    
+      PScoreLex1(string const& alpaspec, string const& lexfile) 
+      { 
+	this->m_index = -1;
+	this->m_num_feats = 2; 
+	this->m_feature_names.reserve(2);
+	this->m_feature_names.push_back("lexfwd");
+	this->m_feature_names.push_back("lexbwd");
+	m_alpha = atof(alpaspec.c_str());
+	scorer.open(lexfile); 
+      }
+    
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	// uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
+	// parse_pid(pp.p1, sid1, off1, len1);
+	// parse_pid(pp.p2, sid2, off2, len2);
+#if 0
+	cout << len1 << " " << len2 << endl;
+	Token const* t1 = bt.T1->sntStart(sid1);
+	for (size_t i = off1; i < off1 + len1; ++i)
+	  cout << (*bt.V1)[t1[i].id()] << " "; 
+	cout << __FILE__ << ":" << __LINE__ << endl;
+	
+	Token const* t2 = bt.T2->sntStart(sid2);
+	for (size_t i = off2; i < off2 + len2; ++i)
+	  cout << (*bt.V2)[t2[i].id()] << " "; 
+	cout << __FILE__ << ":" << __LINE__ << endl;
+	
+	BOOST_FOREACH (int a, pp.aln)
+	  cout << a << " " ;
+	cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
+	
+	scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
+		     bt.T2->sntStart(sid2)+off2,0,len2,
+		     pp.aln, m_alpha,
+		     (*dest)[this->m_index],
+		     (*dest)[this->m_index+1]);
+#endif
+	scorer.score(pp.start1,0, pp.len1, 
+		     pp.start2,0, pp.len2, pp.aln, m_alpha, 
+		     (*dest)[this->m_index], 
+		     (*dest)[this->m_index+1]);
+      }
+    };
+  } //namespace bitext
+} // namespace Moses
+
diff --git a/moses/TranslationModel/UG/sapt_pscore_logcnt.h b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
new file mode 100644
index 0000000000..2790323ed0
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
@@ -0,0 +1,65 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+  namespace bitext  {
+    
+    template<typename Token>
+    class
+    PScoreLogCnt : public PhraseScorer<Token>
+    {
+      string m_specs;
+    public:
+      PScoreLogCnt(string const specs) 
+      { 
+	this->m_index = -1;
+	this->m_specs = specs;
+	if (specs.find("r1") != string::npos) // raw source phrase counts
+	  this->m_feature_names.push_back("log-r1");
+	if (specs.find("s1") != string::npos)
+	  this->m_feature_names.push_back("log-s1"); // L1 sample size
+	if (specs.find("g1") != string::npos) // coherent phrases
+	  this->m_feature_names.push_back("log-g1");
+	if (specs.find("j") != string::npos) // joint counts
+	  this->m_feature_names.push_back("log-j");
+	if (specs.find("r2") != string::npos) // raw target phrase counts
+	  this->m_feature_names.push_back("log-r2");
+	this->m_num_feats = this->m_feature_names.size();
+      }
+
+      bool
+      isIntegerValued(int i) const { return true; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	assert(pp.raw1);
+	assert(pp.sample1);
+	assert(pp.good1);
+	assert(pp.joint);
+	assert(pp.raw2);
+	size_t i = this->m_index;
+	if (m_specs.find("r1") != string::npos) 
+	  (*dest)[i++] = log(pp.raw1);
+	if (m_specs.find("s1") != string::npos) 
+	  (*dest)[i++] = log(pp.sample1);
+	if (m_specs.find("g1") != string::npos) 
+	  (*dest)[i++] = log(pp.good1);
+	if (m_specs.find("j") != string::npos) 
+	  (*dest)[i++] = log(pp.joint);
+	if (m_specs.find("r2") != string::npos) 
+	  (*dest)[++i] = log(pp.raw2);
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_pbwd.h b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
new file mode 100644
index 0000000000..f7b4686d7c
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
@@ -0,0 +1,58 @@
+//-*- c++ -*-
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "boost/foreach.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScorePbwd : public PhraseScorer<Token>
+    {
+      float   conf;
+      string denom;
+      
+    public:
+      PScorePbwd(float const c, string d) 
+      { 
+	this->m_index = -1;
+	conf  = c; 
+	denom = d;
+	size_t checksum = d.size();
+	BOOST_FOREACH(char const& x, denom)
+	  {
+	    if (x == '+') { --checksum; continue; }
+	    if (x != 'g' && x != 's' && x != 'r') continue;
+	    string s = (format("pbwd-%c%.3f") % x % c).str();
+	    this->m_feature_names.push_back(s);
+	  }
+	this->m_num_feats = this->m_feature_names.size();
+	UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
+		       "Unknown parameter in specification '"
+		       << d << "' for Pbwd phrase scorer at " << HERE);
+      }
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	// we use the denominator specification to scale the raw counts on the 
+	// target side; the clean way would be to counter-sample
+	size_t i = this->m_index;
+	BOOST_FOREACH(char const& x, denom)
+	  {
+	    uint32_t m2 = pp.raw2;
+	    if      (x == 'g') m2 = round(m2 * float(pp.good1)   / pp.raw1);
+	    else if (x == 's') m2 = round(m2 * float(pp.sample1) / pp.raw1);
+	    (*dest)[i++] = log(lbop(max(m2, pp.joint),pp.joint,conf));
+	  }
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_pfwd.h b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
new file mode 100644
index 0000000000..ed48a93d24
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
@@ -0,0 +1,70 @@
+// -*- c++ -*-
+// written by Ulrich Germann 
+#pragma once
+#include "moses/TranslationModel/UG/mm/ug_bitext.h"
+#include "util/exception.hh"
+#include "boost/format.hpp"
+#include "boost/foreach.hpp"
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScorePfwd : public PhraseScorer<Token>
+    {
+      float   conf;
+      string denom;
+
+    public:
+
+      PScorePfwd(float const c, string d) 
+      { 
+	this->m_index = -1;
+	conf  = c; 
+	denom = d;
+	size_t checksum = d.size();
+	BOOST_FOREACH(char const& x, denom)
+	  {
+	    if (x == '+') { --checksum; continue; }
+	    if (x != 'g' && x != 's' && x != 'r') continue;
+	    string s = (format("pfwd-%c%.3f") % x % c).str();
+	    this->m_feature_names.push_back(s);
+	  }
+	this->m_num_feats = this->m_feature_names.size();
+	UTIL_THROW_IF2(this->m_feature_names.size() != checksum, 
+		       "Unknown parameter in specification '" 
+		       << d << "' for Pfwd phrase scorer at " << HERE);
+      }
+      
+      void 
+      operator()(Bitext<Token> const& bt, PhrasePair<Token> & pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	if (pp.joint > pp.good1) 
+	  {
+	    pp.joint = pp.good1;
+	    // cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
+	    // cerr<<pp.joint<<"/"<<pp.good1<<"/"<<pp.raw2<<endl;
+	  }
+	size_t i = this->m_index;
+	BOOST_FOREACH(char const& c, this->denom)
+	  {
+	    switch (c)
+	      {
+	      case 'g': 
+		(*dest)[i++] = log(lbop(pp.good1, pp.joint, conf)); 
+		break;
+	      case 's': 
+		(*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf)); 
+		break;
+	      case 'r':
+		(*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf)); 
+	      }
+	  }
+      }
+    };
+  }
+}
+  
diff --git a/moses/TranslationModel/UG/sapt_pscore_provenance.h b/moses/TranslationModel/UG/sapt_pscore_provenance.h
new file mode 100644
index 0000000000..c33b98fe79
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_provenance.h
@@ -0,0 +1,47 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function j/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+  namespace bitext {
+    
+    // asymptotic provenance feature n/(n+x)
+    template<typename Token>
+    class
+    PScoreProvenance : public SingleRealValuedParameterPhraseScorerFamily<Token>
+    {
+    public:
+
+      PScoreProvenance(string const& spec) 
+      {
+	this->m_tag = "prov";
+	this->init(spec);
+      }
+    
+      bool
+      isLogVal(int i) const { return false; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	size_t i = this->m_index;
+	BOOST_FOREACH(float const x, this->m_x)
+	  (*dest).at(i++) = pp.joint/(x + pp.joint);
+      }
+
+      bool
+      allowPooling() const 
+      { return false; }
+
+    };
+  } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_rareness.h b/moses/TranslationModel/UG/sapt_pscore_rareness.h
new file mode 100644
index 0000000000..58f204c88b
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_rareness.h
@@ -0,0 +1,41 @@
+// -*- c++ -*-
+// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
+// with the asymptotic function x/(j+x) where x > 0 is a function
+// parameter that determines the steepness of the rewards curve
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+using namespace std;
+namespace Moses {
+  namespace bitext  {
+    
+    // rareness penalty: x/(n+x)
+    template<typename Token>
+    class
+    PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily<Token>
+    {
+    public:
+      PScoreRareness(string const spec) 
+      {
+	this->m_tag = "rare";
+	this->init(spec);
+      }
+
+      bool
+      isLogVal(int i) const { return false; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	size_t i = this->m_index;
+	BOOST_FOREACH(float const x, this->m_x)
+	  (*dest).at(i++) = x/(x + pp.joint);
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sapt_pscore_unaligned.h b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
new file mode 100644
index 0000000000..bdd2919b4f
--- /dev/null
+++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
@@ -0,0 +1,67 @@
+// -*- c++ -*-
+// Phrase scorer that counts the number of unaligend words in the phrase
+// written by Ulrich Germann 
+
+#include "sapt_pscore_base.h"
+#include <boost/dynamic_bitset.hpp>
+
+namespace Moses {
+  namespace bitext
+  {
+    template<typename Token>
+    class
+    PScoreUnaligned : public PhraseScorer<Token>
+    {
+      typedef boost::dynamic_bitset<uint64_t> bitvector;
+    public:
+      PScoreUnaligned(string const spec) 
+      {
+	this->m_index = -1;
+	int f = this->m_num_feats = atoi(spec.c_str());
+	UTIL_THROW_IF2(f != 1 && f != 2,"unal parameter must be 1 or 2 at "<<HERE);
+	this->m_feature_names.resize(f);
+	if (f == 1)
+	  this->m_feature_names[0] = "unal";
+	else
+	  {
+	    this->m_feature_names[0] = "unal-s";
+	    this->m_feature_names[1] = "unal-t";
+	  }
+      }
+    
+      bool
+      isLogVal(int i) const { return false; } 
+      
+      bool
+      isIntegerValued(int i) const { return true; } 
+
+      void 
+      operator()(Bitext<Token> const& bt, 
+		 PhrasePair<Token>& pp, 
+		 vector<float> * dest = NULL) const
+      {
+	if (!dest) dest = &pp.fvals;
+	// uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
+	// parse_pid(pp.p1, sid1, off1, len1);
+	// parse_pid(pp.p2, sid2, off2, len2);
+	bitvector check1(pp.len1),check2(pp.len2);
+	for (size_t i = 0; i < pp.aln.size(); )
+	  { 
+	    check1.set(pp.aln[i++]); 
+	    check2.set(pp.aln.at(i++)); 
+	  }
+
+	if (this->m_num_feats == 1)
+	  {
+	    (*dest)[this->m_index]  = pp.len1 - check1.count();
+	    (*dest)[this->m_index] += pp.len2 - check2.count();
+	  }
+	else
+	  {
+	    (*dest)[this->m_index]   = pp.len1 - check1.count();
+	    (*dest)[this->m_index+1] = pp.len2 - check2.count();
+	  }
+      }
+    };
+  } // namespace bitext
+} // namespace Moses
diff --git a/moses/TranslationModel/UG/sim-pe.cc b/moses/TranslationModel/UG/sim-pe.cc
new file mode 100644
index 0000000000..58a70cab42
--- /dev/null
+++ b/moses/TranslationModel/UG/sim-pe.cc
@@ -0,0 +1,83 @@
+#include "mmsapt.h"
+#include "moses/Manager.h"
+#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
+#include <boost/foreach.hpp>
+#include <boost/format.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/shared_ptr.hpp>
+#include <algorithm>
+#include <iostream>
+
+using namespace Moses;
+using namespace bitext;
+using namespace std;
+using namespace boost;
+
+vector<FactorType> fo(1,FactorType(0));
+
+ostream& 
+operator<<(ostream& out, Hypothesis const* x)
+{
+  vector<const Hypothesis*> H;
+  for (const Hypothesis* h = x; h; h = h->GetPrevHypo())
+    H.push_back(h);
+  for (; H.size(); H.pop_back())
+    {
+      Phrase const& p = H.back()->GetCurrTargetPhrase();
+      for (size_t pos = 0 ; pos < p.GetSize() ; pos++) 
+	out << *p.GetFactor(pos, 0) << (H.size() ? " " : "");
+    }
+  return out;
+}
+
+vector<FactorType> ifo;
+size_t lineNumber;
+
+string 
+translate(string const& source)
+{
+  StaticData const& global = StaticData::Instance();
+
+  Sentence sentence; 
+  istringstream ibuf(source+"\n"); 
+  sentence.Read(ibuf,ifo);
+
+  Manager manager(lineNumber, sentence, global.GetSearchAlgorithm());
+  manager.ProcessSentence();
+  
+  ostringstream obuf;
+  const Hypothesis* h = manager.GetBestHypothesis();
+  obuf << h;
+  return obuf.str();
+
+}
+
+int main(int argc, char* argv[])
+{
+  Parameter params;
+  if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
+    exit(1);
+  
+  StaticData const& global = StaticData::Instance();
+  global.SetVerboseLevel(0);
+  ifo = global.GetInputFactorOrder();
+
+  lineNumber = 0; // TODO: Include sentence request number here?
+  string source, target, alignment;
+  while (getline(cin,source))
+    {
+      getline(cin,target);
+      getline(cin,alignment);
+      cout << "[S] " << source << endl;
+      cout << "[H] " << translate(source) << endl;
+      cout << "[T] " << target << endl;
+      Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
+      pdsa->add(source,target,alignment);
+      cout << "[X] " << translate(source) << endl;
+      cout << endl;
+    }
+  exit(0);
+}
+  
+  
+
diff --git a/moses/TranslationModel/UG/try-align.cc b/moses/TranslationModel/UG/try-align.cc
index 30c87ccab8..483ad2c34a 100644
--- a/moses/TranslationModel/UG/try-align.cc
+++ b/moses/TranslationModel/UG/try-align.cc
@@ -2,32 +2,33 @@
 using namespace std;
 using namespace Moses;
 
+// currently broken
 
 Mmsapt* PT;
 int main(int argc, char* argv[])
 {
-  string base = argv[1];
-  string L1   = argv[2];
-  string L2   = argv[3];
-  ostringstream buf;
-  buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base="
-      << base << " L1=" << L1 << " L2=" << L2;
-  string configline = buf.str();
-  PT = new Mmsapt(configline);
-  PT->Load();
-  float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856,  0.647506 };
-  vector<float> weights(w,w+5);
-  PT->setWeights(weights);
-  // these values are taken from a moses.ini file;
-  // is there a convenient way of accessing them from within mmsapt ???
-  string eline,fline;
-  // TokenIndex V; V.open("crp/trn/mm/de.tdx");
-  while (getline(cin,eline) && getline(cin,fline))
-    {
-      cout << eline << endl;
-      cout << fline << endl;
-      PT->align(eline,fline);
-    }
-  delete PT;
+  // string base = argv[1];
+  // string L1   = argv[2];
+  // string L2   = argv[3];
+  // ostringstream buf;
+  // buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base="
+  //     << base << " L1=" << L1 << " L2=" << L2;
+  // string configline = buf.str();
+  // PT = new Mmsapt(configline);
+  // PT->Load();
+  // float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856,  0.647506 };
+  // vector<float> weights(w,w+5);
+  // PT->setWeights(weights);
+  // // these values are taken from a moses.ini file;
+  // // is there a convenient way of accessing them from within mmsapt ???
+  // string eline,fline;
+  // // TokenIndex V; V.open("crp/trn/mm/de.tdx");
+  // while (getline(cin,eline) && getline(cin,fline))
+  //   {
+  //     cout << eline << endl;
+  //     cout << fline << endl;
+  //     PT->align(eline,fline);
+  //   }
+  // delete PT;
 }
 
diff --git a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
index 8766743b35..a91c583432 100644
--- a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
+++ b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
@@ -345,10 +345,10 @@ string FuzzyMatchWrapper::ExtractTM(WordIndex &wordIndex, long translationId, co
     // find the best matches according to letter sed
     string best_path = "";
     int best_match = -1;
-    int best_letter_cost;
+    unsigned int best_letter_cost;
     if (lsed_flag) {
       best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
-      for(int si=0; si<best_tm.size(); si++) {
+      for(size_t si=0; si<best_tm.size(); si++) {
         int s = best_tm[si];
         string path;
         unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index fb9fd56cb7..7852d130d1 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -59,7 +59,11 @@ const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
 const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
 const size_t DEFAULT_MAX_TRANS_OPT_SIZE	= 5000;
 const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
-const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+#ifdef PT_UG
+  const size_t DEFAULT_MAX_PHRASE_LENGTH = -1;
+#else
+ const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
+#endif
 const size_t DEFAULT_MAX_CHART_SPAN			= 10;
 const size_t ARRAY_SIZE_INCR					= 10; //amount by which a phrase gets resized when necessary
 const float LOWEST_SCORE							= -100.0f;
diff --git a/moses/Util.h b/moses/Util.h
index 3bba713322..24a4e2c28c 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -56,8 +56,12 @@ namespace Moses
 
 /** verbose macros
  * */
+
 #define VERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR(str); } }
 #define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level)
+#define XVERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR("[" << __FILE__ << ":" << __LINE__ << "] ");TRACE_ERR(str); } }
+#define HERE __FILE__ << ":" << __LINE__
+
 
 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
 // gcc nth_element() bug
diff --git a/scripts/server/moses.py b/scripts/server/moses.py
index 155458b9b4..a176c473ab 100644
--- a/scripts/server/moses.py
+++ b/scripts/server/moses.py
@@ -152,7 +152,7 @@ def find_free_port(p):
 
 class MosesServer(ProcessWrapper):
 
-  def __init__(self,args=["-fd", "\n"]):
+  def __init__(self,args=[]):
     self.process = None
     mserver_cmd  = moses_root+"/bin/mosesserver"
     self.cmd = [mserver_cmd] + args 
@@ -175,7 +175,10 @@ def start(self,config=None,args=[],port=7447,debug=False):
     self.cmd.extend(["--server-port", "%d"%self.port])
     if debug:
       print >>sys.stderr,self.cmd
-      self.process = Popen(self.cmd,stderr = sys.stderr)
+      # self.stderr = open("mserver.%d.stderr"%self.port,'w')
+      # self.stdout = open("mserver.%d.stdout"%self.port,'w')
+      # self.process = Popen(self.cmd,stderr = self.stderr,stdout = self.stdout)
+      self.process = Popen(self.cmd)
     else:
       devnull = open(os.devnull,"w")
       self.process = Popen(self.cmd, stderr=devnull, stdout=devnull)
@@ -216,10 +219,13 @@ def translate(self,input):
 
         elif type(input) is list:
           return [self.translate(x) for x in input]
+
         elif type(input) is dict:
           return self.proxy.translate(input)
+
         else:
           raise Exception("Can't handle input of this type!")
+
       except:
         attempts += 1
         print >>sys.stderr, "WAITING", attempts
diff --git a/scripts/server/sim-pe.py b/scripts/server/sim-pe.py
index 340695a568..52d1e314a9 100755
--- a/scripts/server/sim-pe.py
+++ b/scripts/server/sim-pe.py
@@ -127,13 +127,40 @@ def translate(proxy, args, line):
         param['nbest-distinct'] = True
         pass
     attempts = 0
-    while attempts < 120:
+    while attempts < 20:
+        t1 = time.time()
         try:
-            return proxy.translate(param)
-        except:
-            print >>sys.stderr, "Waiting", proxy
-            attempts += 1
+            return proxy.translate(param) 
+
+        # except xmlrpclib.Fault as e:
+        # except xmlrpclib.ProtocolError as e:
+        # except xmlrpclib.ResponseError as e:
+        except xmlrpclib.Error as e:
+            time.sleep(2) # give all the stderr stuff a chance to be flushed
+            print >>sys.stderr," XMLRPC error:",e
+            print >>sys.stderr, "Input was"
+            print >>sys.stderr, param
+            sys.exit(1)
+
+        except IOError as e:
+            print >>sys.stderr,"I/O error({0}): {1}".format(e.errno, e.strerror)
             time.sleep(5)
+
+        except:
+            serverstatus = mserver.process.poll()
+            if serverstatus == None:
+                print >>sys.stderr, "Connection failed after %f seconds"%(time.time()-t1)
+                attempts += 1
+                if attempts > 10:
+                    time.sleep(10)
+                else:
+                    time.sleep(5)
+                    pass
+            else:
+                
+                print >>sys.stderr, "Oopsidaisy, server exited with code %d (signal %d)"\
+                    %(serverstatus/256,serverstatus%256)
+                pass
             pass
         pass
     raise Exception("Exception: could not reach translation server.")
@@ -210,17 +237,25 @@ def repack_result(idx,result):
             pass
         pass
 
-    if args.url:
-        mserver.connect(args.url)
-    else:
-        mserver.start(args=mo_args,port=args.port,debug=args.debug)
-        pass
-
     ref = None
     aln = None
     if args.ref: ref = read_data(args.ref)
     if args.aln: aln = read_data(args.aln)
 
+    if ref and aln:
+        try:
+            mo_args.index("--serial")
+        except:
+            mo_args.append("--serial")
+            pass
+        pass
+
+    if args.url:
+        mserver.connect(args.url)
+    else:
+        mserver.start(args=mo_args, port=args.port, debug=args.debug)
+        pass
+
     if (args.input == "-"):
         line = sys.stdin.readline()
         idx = 0

From ef33496ea5b82cff3c07da5ed91a26e4776eb52d Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 9 Jul 2014 15:47:34 +0100
Subject: [PATCH 35/84] non-term must have only 1 factor.

---
 OnDiskPt/Word.cpp | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp
index 23d29cc7a7..33bdb6cc5e 100644
--- a/OnDiskPt/Word.cpp
+++ b/OnDiskPt/Word.cpp
@@ -104,14 +104,20 @@ void Word::ConvertToMoses(
   Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
   overwrite = Moses::Word(m_isNonTerminal);
 
-  // TODO: this conversion should have been done at load time.
-  util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
-
-  for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
-    UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
-    overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
+  if (m_isNonTerminal) {
+	    const std::string &tok = vocab.GetString(m_vocabId);
+		overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
+  }
+  else {
+	  // TODO: this conversion should have been done at load time.
+	  util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
+
+	  for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
+		UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
+		overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
+	  }
+	  UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
   }
-  UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
 }
 
 int Word::Compare(const Word &compare) const

From c9bd98fb0f7f794a6d352620da854e2d42b48328 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 9 Jul 2014 22:35:59 +0100
Subject: [PATCH 36/84] rename Evaluate() to EvaluateInIsolation()

---
 moses/FF/BleuScoreFeature.h                    | 2 +-
 moses/FF/ConstrainedDecoding.h                 | 2 +-
 moses/FF/ControlRecombination.h                | 2 +-
 moses/FF/CountNonTerms.cpp                     | 2 +-
 moses/FF/CountNonTerms.h                       | 2 +-
 moses/FF/CoveredReferenceFeature.cpp           | 2 +-
 moses/FF/CoveredReferenceFeature.h             | 2 +-
 moses/FF/DecodeFeature.h                       | 2 +-
 moses/FF/DistortionScoreProducer.h             | 2 +-
 moses/FF/ExternalFeature.h                     | 2 +-
 moses/FF/FeatureFunction.h                     | 2 +-
 moses/FF/GlobalLexicalModel.h                  | 2 +-
 moses/FF/GlobalLexicalModelUnlimited.h         | 2 +-
 moses/FF/HyperParameterAsWeight.h              | 2 +-
 moses/FF/InputFeature.h                        | 3 ++-
 moses/FF/InternalStructStatelessFF.cpp         | 2 +-
 moses/FF/InternalStructStatelessFF.h           | 2 +-
 moses/FF/LexicalReordering/LexicalReordering.h | 2 +-
 moses/FF/MaxSpanFreeNonTermSource.cpp          | 2 +-
 moses/FF/MaxSpanFreeNonTermSource.h            | 2 +-
 moses/FF/NieceTerminal.cpp                     | 2 +-
 moses/FF/NieceTerminal.h                       | 2 +-
 moses/FF/OSM-Feature/OpSequenceModel.cpp       | 2 +-
 moses/FF/OSM-Feature/OpSequenceModel.h         | 2 +-
 moses/FF/PhraseBoundaryFeature.h               | 2 +-
 moses/FF/PhraseLengthFeature.cpp               | 2 +-
 moses/FF/PhraseLengthFeature.h                 | 2 +-
 moses/FF/PhrasePairFeature.h                   | 2 +-
 moses/FF/PhrasePenalty.cpp                     | 2 +-
 moses/FF/PhrasePenalty.h                       | 2 +-
 moses/FF/ReferenceComparison.h                 | 2 +-
 moses/FF/RuleScope.cpp                         | 2 +-
 moses/FF/RuleScope.h                           | 2 +-
 moses/FF/SetSourcePhrase.cpp                   | 2 +-
 moses/FF/SetSourcePhrase.h                     | 2 +-
 moses/FF/SkeletonStatefulFF.cpp                | 2 +-
 moses/FF/SkeletonStatefulFF.h                  | 2 +-
 moses/FF/SkeletonStatelessFF.cpp               | 2 +-
 moses/FF/SkeletonStatelessFF.h                 | 2 +-
 moses/FF/SoftMatchingFeature.h                 | 2 +-
 moses/FF/SourceGHKMTreeInputMatchFeature.h     | 2 +-
 moses/FF/SourceWordDeletionFeature.cpp         | 2 +-
 moses/FF/SourceWordDeletionFeature.h           | 2 +-
 moses/FF/SpanLength.cpp                        | 2 +-
 moses/FF/SpanLength.h                          | 2 +-
 moses/FF/SparseHieroReorderingFeature.h        | 2 +-
 moses/FF/SyntaxRHS.cpp                         | 2 +-
 moses/FF/SyntaxRHS.h                           | 2 +-
 moses/FF/TargetBigramFeature.h                 | 2 +-
 moses/FF/TargetNgramFeature.h                  | 2 +-
 moses/FF/TargetWordInsertionFeature.cpp        | 2 +-
 moses/FF/TargetWordInsertionFeature.h          | 2 +-
 moses/FF/TreeStructureFeature.h                | 2 +-
 moses/FF/UnknownWordPenaltyProducer.h          | 2 +-
 moses/FF/WordPenaltyProducer.cpp               | 2 +-
 moses/FF/WordPenaltyProducer.h                 | 2 +-
 moses/FF/WordTranslationFeature.h              | 2 +-
 moses/LM/Base.cpp                              | 2 +-
 moses/LM/Base.h                                | 2 +-
 moses/ScoreComponentCollectionTest.cpp         | 2 +-
 moses/TargetPhrase.cpp                         | 2 +-
 61 files changed, 62 insertions(+), 61 deletions(-)

diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h
index 99f04f5ff6..e966ed56b6 100644
--- a/moses/FF/BleuScoreFeature.h
+++ b/moses/FF/BleuScoreFeature.h
@@ -128,7 +128,7 @@ class BleuScoreFeature : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h
index 2db192ce87..f9c495c6f0 100644
--- a/moses/FF/ConstrainedDecoding.h
+++ b/moses/FF/ConstrainedDecoding.h
@@ -41,7 +41,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction
     return true;
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h
index 0100d500d4..8bfc7ce526 100644
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@@ -57,7 +57,7 @@ class ControlRecombination : public StatefulFeatureFunction
     return true;
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/CountNonTerms.cpp b/moses/FF/CountNonTerms.cpp
index 92b79cd5d2..03c7b73156 100644
--- a/moses/FF/CountNonTerms.cpp
+++ b/moses/FF/CountNonTerms.cpp
@@ -16,7 +16,7 @@ CountNonTerms::CountNonTerms(const std::string &line)
   ReadParameters();
 }
 
-void CountNonTerms::Evaluate(const Phrase &sourcePhrase
+void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase
               , const TargetPhrase &targetPhrase
               , ScoreComponentCollection &scoreBreakdown
               , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h
index 1fe71745d5..754e88b4a6 100644
--- a/moses/FF/CountNonTerms.h
+++ b/moses/FF/CountNonTerms.h
@@ -12,7 +12,7 @@ class CountNonTerms : public StatelessFeatureFunction
   bool IsUseable(const FactorMask &mask) const
   { return true; }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp
index 25ab829f85..a38031d7e8 100644
--- a/moses/FF/CoveredReferenceFeature.cpp
+++ b/moses/FF/CoveredReferenceFeature.cpp
@@ -40,7 +40,7 @@ int CoveredReferenceState::Compare(const FFState& other) const
 //  return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1;
 }
 
-void CoveredReferenceFeature::Evaluate(const Phrase &source
+void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
                                   , const TargetPhrase &targetPhrase
                                   , ScoreComponentCollection &scoreBreakdown
                                   , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h
index cd2b2f9660..510490e76c 100644
--- a/moses/FF/CoveredReferenceFeature.h
+++ b/moses/FF/CoveredReferenceFeature.h
@@ -52,7 +52,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction
     return new CoveredReferenceState();
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h
index d795983284..393f9c87a7 100644
--- a/moses/FF/DecodeFeature.h
+++ b/moses/FF/DecodeFeature.h
@@ -75,7 +75,7 @@ class DecodeFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h
index 1bc6493e29..0551b9ae8a 100644
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@@ -47,7 +47,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h
index 19eb45f2a1..d2eeb8cd04 100644
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@@ -51,7 +51,7 @@ class ExternalFeature : public StatefulFeatureFunction
 
   void SetParameter(const std::string& key, const std::string& value);
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index 18b016c8fd..edfd57c92e 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -98,7 +98,7 @@ class FeatureFunction
   // source phrase is the substring that the phrase table uses to look up the target phrase,
   // may have more factors than actually need, but not guaranteed.
   // For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const = 0;
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 664835df52..9418d1b152 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -87,7 +87,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h
index f12df7d61f..167b80238f 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.h
+++ b/moses/FF/GlobalLexicalModelUnlimited.h
@@ -97,7 +97,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h
index 9db375c0f7..5c3189b20e 100644
--- a/moses/FF/HyperParameterAsWeight.h
+++ b/moses/FF/HyperParameterAsWeight.h
@@ -17,7 +17,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
   virtual bool IsUseable(const FactorMask &mask) const
   { return true; }
 
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h
index e4b1a8d991..c8ad61ffe3 100644
--- a/moses/FF/InputFeature.h
+++ b/moses/FF/InputFeature.h
@@ -41,11 +41,12 @@ class InputFeature : public StatelessFeatureFunction
     return m_numRealWordCount;
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
   {}
+
   void Evaluate(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
diff --git a/moses/FF/InternalStructStatelessFF.cpp b/moses/FF/InternalStructStatelessFF.cpp
index 06014a1cfe..05b94b87a6 100644
--- a/moses/FF/InternalStructStatelessFF.cpp
+++ b/moses/FF/InternalStructStatelessFF.cpp
@@ -5,7 +5,7 @@ using namespace std;
 
 namespace Moses
 {
-void InternalStructStatelessFF::Evaluate(const Phrase &source
+void InternalStructStatelessFF::EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h
index a0ea3f7124..d7a9a0961f 100644
--- a/moses/FF/InternalStructStatelessFF.h
+++ b/moses/FF/InternalStructStatelessFF.h
@@ -16,7 +16,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction
 	bool IsUseable(const FactorMask &mask) const
 	{ return true; }
 
-	void Evaluate(const Phrase &source
+	void EvaluateInIsolation(const Phrase &source
 	                        , const TargetPhrase &targetPhrase
 	                        , ScoreComponentCollection &scoreBreakdown
 	                        , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 6255987a4f..66f202126e 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -61,7 +61,7 @@ class LexicalReordering : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/MaxSpanFreeNonTermSource.cpp b/moses/FF/MaxSpanFreeNonTermSource.cpp
index 3951fdd270..7165e96f5b 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.cpp
+++ b/moses/FF/MaxSpanFreeNonTermSource.cpp
@@ -27,7 +27,7 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line)
   m_glueTargetLHS.SetFactor(0, factor);
 }
 
-void MaxSpanFreeNonTermSource::Evaluate(const Phrase &source
+void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
 						, const TargetPhrase &targetPhrase
 						, ScoreComponentCollection &scoreBreakdown
 						, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h
index a9eec7b5e9..30f1df02cc 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.h
+++ b/moses/FF/MaxSpanFreeNonTermSource.h
@@ -15,7 +15,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction
 	  virtual bool IsUseable(const FactorMask &mask) const
 	  { return true; }
 
-	  virtual void Evaluate(const Phrase &source
+	  virtual void EvaluateInIsolation(const Phrase &source
 							, const TargetPhrase &targetPhrase
 							, ScoreComponentCollection &scoreBreakdown
 							, ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp
index 88c9f86cd4..6299d9e08c 100644
--- a/moses/FF/NieceTerminal.cpp
+++ b/moses/FF/NieceTerminal.cpp
@@ -25,7 +25,7 @@ std::vector<float> NieceTerminal::DefaultWeights() const
   return ret;
 }
 
-void NieceTerminal::Evaluate(const Phrase &source
+void NieceTerminal::EvaluateInIsolation(const Phrase &source
                                    , const TargetPhrase &targetPhrase
                                    , ScoreComponentCollection &scoreBreakdown
                                    , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h
index b7b398fff4..efa471c001 100644
--- a/moses/FF/NieceTerminal.h
+++ b/moses/FF/NieceTerminal.h
@@ -19,7 +19,7 @@ class NieceTerminal : public StatelessFeatureFunction
     return true;
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index dfa380a774..ba5405729c 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -42,7 +42,7 @@ void OpSequenceModel::Load()
 
 
 
-void OpSequenceModel:: Evaluate(const Phrase &source
+void OpSequenceModel:: EvaluateInIsolation(const Phrase &source
                                 , const TargetPhrase &targetPhrase
                                 , ScoreComponentCollection &scoreBreakdown
                                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 64cab3044e..0a670cc42e 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -46,7 +46,7 @@ class OpSequenceModel : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void  Evaluate(const Phrase &source
+  void  EvaluateInIsolation(const Phrase &source
                  , const TargetPhrase &targetPhrase
                  , ScoreComponentCollection &scoreBreakdown
                  , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h
index fbafc6da99..56ccda7afe 100644
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@@ -60,7 +60,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/PhraseLengthFeature.cpp b/moses/FF/PhraseLengthFeature.cpp
index 43e0d1b2de..7850c374a6 100644
--- a/moses/FF/PhraseLengthFeature.cpp
+++ b/moses/FF/PhraseLengthFeature.cpp
@@ -15,7 +15,7 @@ PhraseLengthFeature::PhraseLengthFeature(const std::string &line)
   ReadParameters();
 }
 
-void PhraseLengthFeature::Evaluate(const Phrase &source
+void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source
                                    , const TargetPhrase &targetPhrase
                                    , ScoreComponentCollection &scoreBreakdown
                                    , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h
index ba835f6545..95640b12f2 100644
--- a/moses/FF/PhraseLengthFeature.h
+++ b/moses/FF/PhraseLengthFeature.h
@@ -41,7 +41,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
 
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h
index 7790e9035d..ce4822f2f8 100644
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@@ -52,7 +52,7 @@ class PhrasePairFeature: public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/PhrasePenalty.cpp b/moses/FF/PhrasePenalty.cpp
index b3e493707a..ddd21e491a 100644
--- a/moses/FF/PhrasePenalty.cpp
+++ b/moses/FF/PhrasePenalty.cpp
@@ -10,7 +10,7 @@ PhrasePenalty::PhrasePenalty(const std::string &line)
   ReadParameters();
 }
 
-void PhrasePenalty::Evaluate(const Phrase &source
+void PhrasePenalty::EvaluateInIsolation(const Phrase &source
                              , const TargetPhrase &targetPhrase
                              , ScoreComponentCollection &scoreBreakdown
                              , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h
index a4014abf1f..09e82db055 100644
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@@ -14,7 +14,7 @@ class PhrasePenalty : public StatelessFeatureFunction
     return true;
   }
 
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index 8b0341fd61..417d38ec44 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -15,7 +15,7 @@ class ReferenceComparison : public StatelessFeatureFunction
 	  virtual bool IsUseable(const FactorMask &mask) const
 	  { return true; }
 
-	  virtual void Evaluate(const Phrase &source
+	  virtual void EvaluateInIsolation(const Phrase &source
 							, const TargetPhrase &targetPhrase
 							, ScoreComponentCollection &scoreBreakdown
 							, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp
index e949c33377..ed329c4ca8 100644
--- a/moses/FF/RuleScope.cpp
+++ b/moses/FF/RuleScope.cpp
@@ -16,7 +16,7 @@ bool IsAmbiguous(const Word &word, bool sourceSyntax)
   return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal);
 }
 
-void RuleScope::Evaluate(const Phrase &source
+void RuleScope::EvaluateInIsolation(const Phrase &source
 						, const TargetPhrase &targetPhrase
 						, ScoreComponentCollection &scoreBreakdown
 						, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h
index 4ac10c804b..53334e789f 100644
--- a/moses/FF/RuleScope.h
+++ b/moses/FF/RuleScope.h
@@ -14,7 +14,7 @@ class RuleScope : public StatelessFeatureFunction
 	  virtual bool IsUseable(const FactorMask &mask) const
 	  { return true; }
 
-	  virtual void Evaluate(const Phrase &source
+	  virtual void EvaluateInIsolation(const Phrase &source
 							, const TargetPhrase &targetPhrase
 							, ScoreComponentCollection &scoreBreakdown
 							, ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/SetSourcePhrase.cpp b/moses/FF/SetSourcePhrase.cpp
index 0a2eaa4cb9..f89683f28b 100644
--- a/moses/FF/SetSourcePhrase.cpp
+++ b/moses/FF/SetSourcePhrase.cpp
@@ -10,7 +10,7 @@ SetSourcePhrase::SetSourcePhrase(const std::string &line)
   ReadParameters();
 }
 
-void SetSourcePhrase::Evaluate(const Phrase &source
+void SetSourcePhrase::EvaluateInIsolation(const Phrase &source
 						, const TargetPhrase &targetPhrase
 						, ScoreComponentCollection &scoreBreakdown
 						, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index 0d7ad2adea..dfc480f9e3 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -14,7 +14,7 @@ class SetSourcePhrase : public StatelessFeatureFunction
   virtual bool IsUseable(const FactorMask &mask) const
   { return true; }
 
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
 						, const TargetPhrase &targetPhrase
 						, ScoreComponentCollection &scoreBreakdown
 						, ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp
index 2dfec5fad6..85df270e2c 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/SkeletonStatefulFF.cpp
@@ -23,7 +23,7 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line)
   ReadParameters();
 }
 
-void SkeletonStatefulFF::Evaluate(const Phrase &source
+void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
                                   , const TargetPhrase &targetPhrase
                                   , ScoreComponentCollection &scoreBreakdown
                                   , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h
index 9cbe6b512c..448f1ed0e8 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/SkeletonStatefulFF.h
@@ -30,7 +30,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction
     return new SkeletonState(0);
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp
index c05e27dec8..e032063b3c 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/SkeletonStatelessFF.cpp
@@ -13,7 +13,7 @@ SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line)
   ReadParameters();
 }
 
-void SkeletonStatelessFF::Evaluate(const Phrase &source
+void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
                                    , const TargetPhrase &targetPhrase
                                    , ScoreComponentCollection &scoreBreakdown
                                    , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h
index 5adb35f6dc..9e9b4bdfde 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/SkeletonStatelessFF.h
@@ -15,7 +15,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction
     return true;
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h
index b823c2426e..542c9d459d 100644
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@@ -22,7 +22,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction
   virtual void EvaluateChart(const ChartHypothesis& hypo,
                              ScoreComponentCollection* accumulator) const;
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const {};
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h
index a1ddae3259..b910d54b63 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.h
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h
@@ -17,7 +17,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction
 
   void SetParameter(const std::string& key, const std::string& value);
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const {};
diff --git a/moses/FF/SourceWordDeletionFeature.cpp b/moses/FF/SourceWordDeletionFeature.cpp
index 101e405799..e5167b93be 100644
--- a/moses/FF/SourceWordDeletionFeature.cpp
+++ b/moses/FF/SourceWordDeletionFeature.cpp
@@ -63,7 +63,7 @@ bool SourceWordDeletionFeature::IsUseable(const FactorMask &mask) const
   return ret;
 }
 
-void SourceWordDeletionFeature::Evaluate(const Phrase &source
+void SourceWordDeletionFeature::EvaluateInIsolation(const Phrase &source
     , const TargetPhrase &targetPhrase
     , ScoreComponentCollection &scoreBreakdown
     , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h
index 9b04476af1..bd1ddb2398 100644
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@@ -28,7 +28,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction
 
   bool IsUseable(const FactorMask &mask) const;
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp
index 6192334be7..966aa0b944 100644
--- a/moses/FF/SpanLength.cpp
+++ b/moses/FF/SpanLength.cpp
@@ -21,7 +21,7 @@ SpanLength::SpanLength(const std::string &line)
   ReadParameters();
 }
 
-void SpanLength::Evaluate(const Phrase &source
+void SpanLength::EvaluateInIsolation(const Phrase &source
 						, const TargetPhrase &targetPhrase
 						, ScoreComponentCollection &scoreBreakdown
 						, ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h
index ba2196f871..caa6878b8c 100644
--- a/moses/FF/SpanLength.h
+++ b/moses/FF/SpanLength.h
@@ -14,7 +14,7 @@ class SpanLength : public StatelessFeatureFunction
 	  virtual bool IsUseable(const FactorMask &mask) const
 	  { return true; }
 
-	  virtual void Evaluate(const Phrase &source
+	  virtual void EvaluateInIsolation(const Phrase &source
 							, const TargetPhrase &targetPhrase
 							, ScoreComponentCollection &scoreBreakdown
 							, ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
index 5d0f5830c4..82b9890741 100644
--- a/moses/FF/SparseHieroReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -31,7 +31,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction
 
   void SetParameter(const std::string& key, const std::string& value);
 
-	void Evaluate(const Phrase &source
+	void EvaluateInIsolation(const Phrase &source
 	                        , const TargetPhrase &targetPhrase
 	                        , ScoreComponentCollection &scoreBreakdown
 	                        , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp
index 292eac0048..abcff2c3be 100644
--- a/moses/FF/SyntaxRHS.cpp
+++ b/moses/FF/SyntaxRHS.cpp
@@ -14,7 +14,7 @@ SyntaxRHS::SyntaxRHS(const std::string &line)
   ReadParameters();
 }
 
-void SyntaxRHS::Evaluate(const Phrase &source
+void SyntaxRHS::EvaluateInIsolation(const Phrase &source
                                    , const TargetPhrase &targetPhrase
                                    , ScoreComponentCollection &scoreBreakdown
                                    , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h
index 1f9adcb938..9a59597ba1 100644
--- a/moses/FF/SyntaxRHS.h
+++ b/moses/FF/SyntaxRHS.h
@@ -15,7 +15,7 @@ class SyntaxRHS : public StatelessFeatureFunction
     return true;
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h
index fe2500ad27..8c600ab3ab 100644
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@@ -54,7 +54,7 @@ class TargetBigramFeature : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h
index 8e91a08b29..7ea236d9d8 100644
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@@ -199,7 +199,7 @@ class TargetNgramFeature : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/TargetWordInsertionFeature.cpp b/moses/FF/TargetWordInsertionFeature.cpp
index 7bb1ae6e9e..c8db6bfe38 100644
--- a/moses/FF/TargetWordInsertionFeature.cpp
+++ b/moses/FF/TargetWordInsertionFeature.cpp
@@ -53,7 +53,7 @@ void TargetWordInsertionFeature::Load()
   m_unrestricted = false;
 }
 
-void TargetWordInsertionFeature::Evaluate(const Phrase &source
+void TargetWordInsertionFeature::EvaluateInIsolation(const Phrase &source
     , const TargetPhrase &targetPhrase
     , ScoreComponentCollection &scoreBreakdown
     , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h
index eedde61b2f..58ea10a4bd 100644
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@@ -28,7 +28,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction
 
   void Load();
 
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h
index 0fbf0f9ea5..f422c4a878 100644
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@@ -152,7 +152,7 @@ class TreeStructureFeature : public StatefulFeatureFunction
     return true;
   }
 
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const {};
diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h
index 3b48f4380f..93ae6d7ec1 100644
--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@@ -44,7 +44,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/WordPenaltyProducer.cpp b/moses/FF/WordPenaltyProducer.cpp
index 6dea01b72f..1e191d0402 100644
--- a/moses/FF/WordPenaltyProducer.cpp
+++ b/moses/FF/WordPenaltyProducer.cpp
@@ -17,7 +17,7 @@ WordPenaltyProducer::WordPenaltyProducer(const std::string &line)
   s_instance = this;
 }
 
-void WordPenaltyProducer::Evaluate(const Phrase &source
+void WordPenaltyProducer::EvaluateInIsolation(const Phrase &source
                                    , const TargetPhrase &targetPhrase
                                    , ScoreComponentCollection &scoreBreakdown
                                    , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h
index ffd9216771..337ae2666e 100644
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@@ -27,7 +27,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction
     return true;
   }
 
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h
index 072ba1d6ad..a264e2fe4b 100644
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@@ -60,7 +60,7 @@ class WordTranslationFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/LM/Base.cpp b/moses/LM/Base.cpp
index f59b5e31b5..db71119d57 100644
--- a/moses/LM/Base.cpp
+++ b/moses/LM/Base.cpp
@@ -69,7 +69,7 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c
   // out << "ReportHistoryOrder not implemented";
 }
 
-void LanguageModel::Evaluate(const Phrase &source
+void LanguageModel::EvaluateInIsolation(const Phrase &source
                              , const TargetPhrase &targetPhrase
                              , ScoreComponentCollection &scoreBreakdown
                              , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/LM/Base.h b/moses/LM/Base.h
index abae5de241..ef148c8b6c 100644
--- a/moses/LM/Base.h
+++ b/moses/LM/Base.h
@@ -87,7 +87,7 @@ class LanguageModel : public StatefulFeatureFunction
   virtual void IncrementalCallback(Incremental::Manager &manager) const;
   virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
 
-  virtual void Evaluate(const Phrase &source
+  virtual void EvaluateInIsolation(const Phrase &source
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp
index de542d1f69..3e6fd57293 100644
--- a/moses/ScoreComponentCollectionTest.cpp
+++ b/moses/ScoreComponentCollectionTest.cpp
@@ -43,7 +43,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore) const
   {}
-  void Evaluate(const Phrase &source
+  void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index d356ab2ccc..2d9d1a06ee 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -129,7 +129,7 @@ void TargetPhrase::Evaluate(const Phrase &source, const std::vector<FeatureFunct
     for (size_t i = 0; i < ffs.size(); ++i) {
       const FeatureFunction &ff = *ffs[i];
       if (! staticData.IsFeatureFunctionIgnored( ff )) {
-        ff.Evaluate(source, *this, m_scoreBreakdown, futureScoreBreakdown);
+        ff.EvaluateInIsolation(source, *this, m_scoreBreakdown, futureScoreBreakdown);
       }
     }
 

From 587b5fd716f0cfc5207bbd790accd38ab1ae0ad6 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 9 Jul 2014 23:06:54 +0100
Subject: [PATCH 37/84] rename Evaluate() to EvaluateWithSourceContext()

---
 moses/ChartTranslationOption.cpp               | 2 +-
 moses/FF/BleuScoreFeature.h                    | 2 +-
 moses/FF/ConstrainedDecoding.h                 | 2 +-
 moses/FF/ControlRecombination.h                | 2 +-
 moses/FF/CountNonTerms.h                       | 2 +-
 moses/FF/CoveredReferenceFeature.cpp           | 2 +-
 moses/FF/CoveredReferenceFeature.h             | 2 +-
 moses/FF/DecodeFeature.h                       | 2 +-
 moses/FF/DistortionScoreProducer.h             | 2 +-
 moses/FF/ExternalFeature.h                     | 2 +-
 moses/FF/FeatureFunction.h                     | 2 +-
 moses/FF/GlobalLexicalModel.h                  | 2 +-
 moses/FF/GlobalLexicalModelUnlimited.h         | 2 +-
 moses/FF/HyperParameterAsWeight.h              | 2 +-
 moses/FF/InputFeature.cpp                      | 2 +-
 moses/FF/InputFeature.h                        | 2 +-
 moses/FF/InternalStructStatelessFF.cpp         | 2 +-
 moses/FF/InternalStructStatelessFF.h           | 2 +-
 moses/FF/LexicalReordering/LexicalReordering.h | 2 +-
 moses/FF/MaxSpanFreeNonTermSource.cpp          | 2 +-
 moses/FF/MaxSpanFreeNonTermSource.h            | 2 +-
 moses/FF/NieceTerminal.cpp                     | 2 +-
 moses/FF/NieceTerminal.h                       | 2 +-
 moses/FF/OSM-Feature/OpSequenceModel.h         | 2 +-
 moses/FF/PhraseBoundaryFeature.h               | 2 +-
 moses/FF/PhraseLengthFeature.h                 | 2 +-
 moses/FF/PhrasePairFeature.h                   | 2 +-
 moses/FF/PhrasePenalty.h                       | 2 +-
 moses/FF/ReferenceComparison.h                 | 2 +-
 moses/FF/RuleScope.h                           | 2 +-
 moses/FF/SetSourcePhrase.h                     | 2 +-
 moses/FF/SkeletonStatefulFF.cpp                | 2 +-
 moses/FF/SkeletonStatefulFF.h                  | 2 +-
 moses/FF/SkeletonStatelessFF.cpp               | 2 +-
 moses/FF/SkeletonStatelessFF.h                 | 2 +-
 moses/FF/SoftMatchingFeature.h                 | 2 +-
 moses/FF/SourceGHKMTreeInputMatchFeature.cpp   | 2 +-
 moses/FF/SourceGHKMTreeInputMatchFeature.h     | 2 +-
 moses/FF/SourceWordDeletionFeature.h           | 2 +-
 moses/FF/SpanLength.cpp                        | 2 +-
 moses/FF/SpanLength.h                          | 2 +-
 moses/FF/SparseHieroReorderingFeature.h        | 2 +-
 moses/FF/SyntaxRHS.cpp                         | 2 +-
 moses/FF/SyntaxRHS.h                           | 2 +-
 moses/FF/TargetBigramFeature.h                 | 2 +-
 moses/FF/TargetNgramFeature.h                  | 2 +-
 moses/FF/TargetWordInsertionFeature.h          | 2 +-
 moses/FF/TreeStructureFeature.h                | 2 +-
 moses/FF/UnknownWordPenaltyProducer.h          | 2 +-
 moses/FF/WordPenaltyProducer.h                 | 2 +-
 moses/FF/WordTranslationFeature.h              | 2 +-
 moses/LM/Base.h                                | 2 +-
 moses/ScoreComponentCollectionTest.cpp         | 2 +-
 moses/TargetPhrase.cpp                         | 2 +-
 54 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/moses/ChartTranslationOption.cpp b/moses/ChartTranslationOption.cpp
index 0fece0a093..daf1f89ce5 100644
--- a/moses/ChartTranslationOption.cpp
+++ b/moses/ChartTranslationOption.cpp
@@ -18,7 +18,7 @@ void ChartTranslationOption::Evaluate(const InputType &input,
 
   for (size_t i = 0; i < ffs.size(); ++i) {
     const FeatureFunction &ff = *ffs[i];
-    ff.Evaluate(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown);
+    ff.EvaluateWithSourceContext(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown);
   }
 }
 
diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h
index e966ed56b6..cb974da207 100644
--- a/moses/FF/BleuScoreFeature.h
+++ b/moses/FF/BleuScoreFeature.h
@@ -121,7 +121,7 @@ class BleuScoreFeature : public StatefulFeatureFunction
   FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
                          int featureID,
                          ScoreComponentCollection* accumulator) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h
index f9c495c6f0..21d8a69c0a 100644
--- a/moses/FF/ConstrainedDecoding.h
+++ b/moses/FF/ConstrainedDecoding.h
@@ -47,7 +47,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction
                 , ScoreComponentCollection &estimatedFutureScore) const
   {}
   
-    void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h
index 8bfc7ce526..c35714d543 100644
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@@ -62,7 +62,7 @@ class ControlRecombination : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
   {}
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h
index 754e88b4a6..1b078978c5 100644
--- a/moses/FF/CountNonTerms.h
+++ b/moses/FF/CountNonTerms.h
@@ -17,7 +17,7 @@ class CountNonTerms : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp
index a38031d7e8..81c19dc4bf 100644
--- a/moses/FF/CoveredReferenceFeature.cpp
+++ b/moses/FF/CoveredReferenceFeature.cpp
@@ -46,7 +46,7 @@ void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source
                                   , ScoreComponentCollection &estimatedFutureScore) const
 {}
 
-void CoveredReferenceFeature::Evaluate(const InputType &input
+void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input
                                   , const InputPath &inputPath
                                   , const TargetPhrase &targetPhrase
                                   , const StackVec *stackVec
diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h
index 510490e76c..b1c77d4e31 100644
--- a/moses/FF/CoveredReferenceFeature.h
+++ b/moses/FF/CoveredReferenceFeature.h
@@ -56,7 +56,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h
index 393f9c87a7..27906fa872 100644
--- a/moses/FF/DecodeFeature.h
+++ b/moses/FF/DecodeFeature.h
@@ -68,7 +68,7 @@ class DecodeFeature : public StatelessFeatureFunction
   void EvaluateChart(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h
index 0551b9ae8a..bc979d231b 100644
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@@ -40,7 +40,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction
     throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet");
   }
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h
index d2eeb8cd04..b2be498d45 100644
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@@ -56,7 +56,7 @@ class ExternalFeature : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const
   {}
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index edfd57c92e..42ac129745 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -110,7 +110,7 @@ class FeatureFunction
   // It is guaranteed to be in the same order as the non-terms in the source phrase.
   // For pb models, stackvec is NULL.
   // No FF should set estimatedFutureScore in both overloads!
-  virtual void Evaluate(const InputType &input
+  virtual void EvaluateWithSourceContext(const InputType &input
                         , const InputPath &inputPath
                         , const TargetPhrase &targetPhrase
                         , const StackVec *stackVec
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 9418d1b152..16963117b0 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -80,7 +80,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction
     throw std::logic_error("GlobalLexicalModel not supported in chart decoder, yet");
   }
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h
index 167b80238f..cc7bd17e96 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.h
+++ b/moses/FF/GlobalLexicalModelUnlimited.h
@@ -90,7 +90,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
     throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
   }
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h
index 5c3189b20e..e35e610245 100644
--- a/moses/FF/HyperParameterAsWeight.h
+++ b/moses/FF/HyperParameterAsWeight.h
@@ -23,7 +23,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
                         , ScoreComponentCollection &estimatedFutureScore) const
   {}
 
-  virtual void Evaluate(const InputType &input
+  virtual void EvaluateWithSourceContext(const InputType &input
                         , const InputPath &inputPath
                         , const TargetPhrase &targetPhrase
                         , const StackVec *stackVec
diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp
index 0fa2005d1b..61753c5951 100644
--- a/moses/FF/InputFeature.cpp
+++ b/moses/FF/InputFeature.cpp
@@ -44,7 +44,7 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value
 
 }
 
-void InputFeature::Evaluate(const InputType &input
+void InputFeature::EvaluateWithSourceContext(const InputType &input
                             , const InputPath &inputPath
                             , const TargetPhrase &targetPhrase
                             , const StackVec *stackVec
diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h
index c8ad61ffe3..2c83a958c2 100644
--- a/moses/FF/InputFeature.h
+++ b/moses/FF/InputFeature.h
@@ -47,7 +47,7 @@ class InputFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &estimatedFutureScore) const
   {}
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/InternalStructStatelessFF.cpp b/moses/FF/InternalStructStatelessFF.cpp
index 05b94b87a6..a050bd8ef9 100644
--- a/moses/FF/InternalStructStatelessFF.cpp
+++ b/moses/FF/InternalStructStatelessFF.cpp
@@ -15,7 +15,7 @@ void InternalStructStatelessFF::EvaluateInIsolation(const Phrase &source
 
 }
 
-void InternalStructStatelessFF::Evaluate(const InputType &input
+void InternalStructStatelessFF::EvaluateWithSourceContext(const InputType &input
 	                        , const InputPath &inputPath
 	                        , const TargetPhrase &targetPhrase
 	                        , const StackVec *stackVec
diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h
index d7a9a0961f..2d1258bd76 100644
--- a/moses/FF/InternalStructStatelessFF.h
+++ b/moses/FF/InternalStructStatelessFF.h
@@ -21,7 +21,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction
 	                        , ScoreComponentCollection &scoreBreakdown
 	                        , ScoreComponentCollection &estimatedFutureScore) const;
 
-	void Evaluate(const InputType &input
+	void EvaluateWithSourceContext(const InputType &input
 	                        , const InputPath &inputPath
 	                        , const TargetPhrase &targetPhrase
 	                        , const StackVec *stackVec
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 66f202126e..1071ebfd68 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -54,7 +54,7 @@ class LexicalReordering : public StatefulFeatureFunction
                                  ScoreComponentCollection*) const {
     UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
   }
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/MaxSpanFreeNonTermSource.cpp b/moses/FF/MaxSpanFreeNonTermSource.cpp
index 7165e96f5b..9de5826358 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.cpp
+++ b/moses/FF/MaxSpanFreeNonTermSource.cpp
@@ -35,7 +35,7 @@ void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source
   targetPhrase.SetRuleSource(source);
 }
 
-void MaxSpanFreeNonTermSource::Evaluate(const InputType &input
+void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input
                        , const InputPath &inputPath
                        , const TargetPhrase &targetPhrase
                        , const StackVec *stackVec
diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h
index 30f1df02cc..f0d0e34e65 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.h
+++ b/moses/FF/MaxSpanFreeNonTermSource.h
@@ -20,7 +20,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction
 							, ScoreComponentCollection &scoreBreakdown
 							, ScoreComponentCollection &estimatedFutureScore) const;
 
-	  virtual void Evaluate(const InputType &input
+	  virtual void EvaluateWithSourceContext(const InputType &input
 	                         , const InputPath &inputPath
 	                         , const TargetPhrase &targetPhrase
 	                         , const StackVec *stackVec
diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp
index 6299d9e08c..c8b62ea29c 100644
--- a/moses/FF/NieceTerminal.cpp
+++ b/moses/FF/NieceTerminal.cpp
@@ -33,7 +33,7 @@ void NieceTerminal::EvaluateInIsolation(const Phrase &source
   targetPhrase.SetRuleSource(source);
 }
 
-void NieceTerminal::Evaluate(const InputType &input
+void NieceTerminal::EvaluateWithSourceContext(const InputType &input
                                    , const InputPath &inputPath
                                    , const TargetPhrase &targetPhrase
                                    , const StackVec *stackVec
diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h
index efa471c001..0953be44fb 100644
--- a/moses/FF/NieceTerminal.h
+++ b/moses/FF/NieceTerminal.h
@@ -23,7 +23,7 @@ class NieceTerminal : public StatelessFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 0a670cc42e..94fa6899de 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -39,7 +39,7 @@ class OpSequenceModel : public StatefulFeatureFunction
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h
index 56ccda7afe..33bf43cb0b 100644
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@@ -53,7 +53,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction
     throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
   }
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h
index 95640b12f2..f154a2ef60 100644
--- a/moses/FF/PhraseLengthFeature.h
+++ b/moses/FF/PhraseLengthFeature.h
@@ -33,7 +33,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction
     throw std::logic_error("PhraseLengthFeature not valid in chart decoder");
   }
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h
index ce4822f2f8..8d2a6a659b 100644
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@@ -45,7 +45,7 @@ class PhrasePairFeature: public StatelessFeatureFunction
     throw std::logic_error("PhrasePairFeature not valid in chart decoder");
   }
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h
index 09e82db055..2002f88a3e 100644
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@@ -26,7 +26,7 @@ class PhrasePenalty : public StatelessFeatureFunction
                      ScoreComponentCollection* accumulator) const
   {}
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index 417d38ec44..7235aa5884 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -21,7 +21,7 @@ class ReferenceComparison : public StatelessFeatureFunction
 							, ScoreComponentCollection &estimatedFutureScore) const
 	  {}
 
-	  virtual void Evaluate(const InputType &input
+	  virtual void EvaluateWithSourceContext(const InputType &input
 	                         , const InputPath &inputPath
 	                         , const TargetPhrase &targetPhrase
 	                         , const StackVec *stackVec
diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h
index 53334e789f..0aa0552856 100644
--- a/moses/FF/RuleScope.h
+++ b/moses/FF/RuleScope.h
@@ -19,7 +19,7 @@ class RuleScope : public StatelessFeatureFunction
 							, ScoreComponentCollection &scoreBreakdown
 							, ScoreComponentCollection &estimatedFutureScore) const;
 
-	  virtual void Evaluate(const InputType &input
+	  virtual void EvaluateWithSourceContext(const InputType &input
 	                         , const InputPath &inputPath
 	                         , const TargetPhrase &targetPhrase
 	                         , const StackVec *stackVec
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index dfc480f9e3..c78fe14fae 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -19,7 +19,7 @@ class SetSourcePhrase : public StatelessFeatureFunction
 						, ScoreComponentCollection &scoreBreakdown
 						, ScoreComponentCollection &estimatedFutureScore) const;
 
-  virtual void Evaluate(const InputType &input
+  virtual void EvaluateWithSourceContext(const InputType &input
                          , const InputPath &inputPath
                          , const TargetPhrase &targetPhrase
                          , const StackVec *stackVec
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp
index 85df270e2c..3c7eb249c1 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/SkeletonStatefulFF.cpp
@@ -29,7 +29,7 @@ void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source
                                   , ScoreComponentCollection &estimatedFutureScore) const
 {}
 
-void SkeletonStatefulFF::Evaluate(const InputType &input
+void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
                                   , const InputPath &inputPath
                                   , const TargetPhrase &targetPhrase
                                   , const StackVec *stackVec
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h
index 448f1ed0e8..c88381cefc 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/SkeletonStatefulFF.h
@@ -34,7 +34,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp
index e032063b3c..6c6193372a 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/SkeletonStatelessFF.cpp
@@ -29,7 +29,7 @@ void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source
 
 }
 
-void SkeletonStatelessFF::Evaluate(const InputType &input
+void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
                                    , const InputPath &inputPath
                                    , const TargetPhrase &targetPhrase
                                    , const StackVec *stackVec
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h
index 9e9b4bdfde..4b88fd2136 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/SkeletonStatelessFF.h
@@ -19,7 +19,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h
index 542c9d459d..110fc87bb7 100644
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@@ -26,7 +26,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const {};
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
index 0dbb3a7be1..38238b10c0 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp
@@ -32,7 +32,7 @@ void SourceGHKMTreeInputMatchFeature::SetParameter(const std::string& key, const
 }
 
 // assumes that source-side syntax labels are stored in the target non-terminal field of the rules
-void SourceGHKMTreeInputMatchFeature::Evaluate(const InputType &input
+void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType &input
                                                , const InputPath &inputPath
                                                , const TargetPhrase &targetPhrase
                                                , const StackVec *stackVec
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h
index b910d54b63..80ce6af90e 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.h
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h
@@ -22,7 +22,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const {};
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h
index bd1ddb2398..cba5ec0043 100644
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@@ -32,7 +32,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp
index 966aa0b944..7a7c87be83 100644
--- a/moses/FF/SpanLength.cpp
+++ b/moses/FF/SpanLength.cpp
@@ -29,7 +29,7 @@ void SpanLength::EvaluateInIsolation(const Phrase &source
   targetPhrase.SetRuleSource(source);
 }
 
-void SpanLength::Evaluate(const InputType &input
+void SpanLength::EvaluateWithSourceContext(const InputType &input
                                    , const InputPath &inputPath
                                    , const TargetPhrase &targetPhrase
                                    , const StackVec *stackVec
diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h
index caa6878b8c..7b3726a0eb 100644
--- a/moses/FF/SpanLength.h
+++ b/moses/FF/SpanLength.h
@@ -19,7 +19,7 @@ class SpanLength : public StatelessFeatureFunction
 							, ScoreComponentCollection &scoreBreakdown
 							, ScoreComponentCollection &estimatedFutureScore) const;
 
-	  virtual void Evaluate(const InputType &input
+	  virtual void EvaluateWithSourceContext(const InputType &input
 	                         , const InputPath &inputPath
 	                         , const TargetPhrase &targetPhrase
 	                         , const StackVec *stackVec
diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
index 82b9890741..b0f20d7f78 100644
--- a/moses/FF/SparseHieroReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -36,7 +36,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction
 	                        , ScoreComponentCollection &scoreBreakdown
 	                        , ScoreComponentCollection &estimatedFutureScore) const
 	{}
-  virtual void Evaluate(const InputType &input
+  virtual void EvaluateWithSourceContext(const InputType &input
                         , const InputPath &inputPath
                         , const TargetPhrase &targetPhrase
                         , const StackVec *stackVec
diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp
index abcff2c3be..e168ff4dde 100644
--- a/moses/FF/SyntaxRHS.cpp
+++ b/moses/FF/SyntaxRHS.cpp
@@ -21,7 +21,7 @@ void SyntaxRHS::EvaluateInIsolation(const Phrase &source
 {
 }
 
-void SyntaxRHS::Evaluate(const InputType &input
+void SyntaxRHS::EvaluateWithSourceContext(const InputType &input
                                    , const InputPath &inputPath
                                    , const TargetPhrase &targetPhrase
                                    , const StackVec *stackVec
diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h
index 9a59597ba1..fedeac7246 100644
--- a/moses/FF/SyntaxRHS.h
+++ b/moses/FF/SyntaxRHS.h
@@ -19,7 +19,7 @@ class SyntaxRHS : public StatelessFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h
index 8c600ab3ab..6d4170a8a3 100644
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@@ -47,7 +47,7 @@ class TargetBigramFeature : public StatefulFeatureFunction
                                   ScoreComponentCollection* ) const {
     throw std::logic_error("TargetBigramFeature not valid in chart decoder");
   }
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h
index 7ea236d9d8..a1da40d32c 100644
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@@ -192,7 +192,7 @@ class TargetNgramFeature : public StatefulFeatureFunction
   virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
                                  ScoreComponentCollection* accumulator) const;
 
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h
index 58ea10a4bd..3a9230c9da 100644
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@@ -32,7 +32,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h
index f422c4a878..fa915f9164 100644
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@@ -156,7 +156,7 @@ class TreeStructureFeature : public StatefulFeatureFunction
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection &estimatedFutureScore) const {};
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h
index 93ae6d7ec1..5a741db570 100644
--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@@ -37,7 +37,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction
   void EvaluateChart(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h
index 337ae2666e..594610344d 100644
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@@ -37,7 +37,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction
   void EvaluateChart(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h
index a264e2fe4b..91dce3130b 100644
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@@ -53,7 +53,7 @@ class WordTranslationFeature : public StatelessFeatureFunction
 
   void EvaluateChart(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/LM/Base.h b/moses/LM/Base.h
index ef148c8b6c..2be19e5bd4 100644
--- a/moses/LM/Base.h
+++ b/moses/LM/Base.h
@@ -91,7 +91,7 @@ class LanguageModel : public StatefulFeatureFunction
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp
index 3e6fd57293..719e05e7d3 100644
--- a/moses/ScoreComponentCollectionTest.cpp
+++ b/moses/ScoreComponentCollectionTest.cpp
@@ -36,7 +36,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction
     StatelessFeatureFunction(n, line) {}
   void Evaluate(const Hypothesis&, ScoreComponentCollection*) const {}
   void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {}
-  void Evaluate(const InputType &input
+  void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index 2d9d1a06ee..aef4f0feeb 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -148,7 +148,7 @@ void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
   for (size_t i = 0; i < ffs.size(); ++i) {
     const FeatureFunction &ff = *ffs[i];
     if (! staticData.IsFeatureFunctionIgnored( ff )) {
-      ff.Evaluate(input, inputPath, *this, NULL, m_scoreBreakdown, &futureScoreBreakdown);
+      ff.EvaluateWithSourceContext(input, inputPath, *this, NULL, m_scoreBreakdown, &futureScoreBreakdown);
     }
   }
   float weightedScore = m_scoreBreakdown.GetWeightedScore();

From 12a14221e283e6e1d69f14d35437f36b9a728e50 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 9 Jul 2014 23:41:08 +0100
Subject: [PATCH 38/84] rename Evaluate() to EvaluateWhenApplied()

---
 moses/FF/BleuScoreFeature.cpp                    | 2 +-
 moses/FF/BleuScoreFeature.h                      | 2 +-
 moses/FF/ConstrainedDecoding.cpp                 | 2 +-
 moses/FF/ConstrainedDecoding.h                   | 2 +-
 moses/FF/ControlRecombination.cpp                | 2 +-
 moses/FF/ControlRecombination.h                  | 2 +-
 moses/FF/CountNonTerms.h                         | 2 +-
 moses/FF/CoveredReferenceFeature.cpp             | 2 +-
 moses/FF/CoveredReferenceFeature.h               | 2 +-
 moses/FF/DecodeFeature.h                         | 2 +-
 moses/FF/DistortionScoreProducer.cpp             | 2 +-
 moses/FF/DistortionScoreProducer.h               | 2 +-
 moses/FF/ExternalFeature.cpp                     | 2 +-
 moses/FF/ExternalFeature.h                       | 2 +-
 moses/FF/GlobalLexicalModel.cpp                  | 2 +-
 moses/FF/GlobalLexicalModel.h                    | 2 +-
 moses/FF/GlobalLexicalModelUnlimited.cpp         | 2 +-
 moses/FF/GlobalLexicalModelUnlimited.h           | 2 +-
 moses/FF/HyperParameterAsWeight.h                | 2 +-
 moses/FF/InputFeature.h                          | 2 +-
 moses/FF/InternalStructStatelessFF.h             | 2 +-
 moses/FF/LexicalReordering/LexicalReordering.cpp | 2 +-
 moses/FF/LexicalReordering/LexicalReordering.h   | 2 +-
 moses/FF/MaxSpanFreeNonTermSource.h              | 2 +-
 moses/FF/NieceTerminal.cpp                       | 2 +-
 moses/FF/NieceTerminal.h                         | 2 +-
 moses/FF/OSM-Feature/OpSequenceModel.cpp         | 2 +-
 moses/FF/OSM-Feature/OpSequenceModel.h           | 2 +-
 moses/FF/PhraseBoundaryFeature.cpp               | 2 +-
 moses/FF/PhraseBoundaryFeature.h                 | 2 +-
 moses/FF/PhraseLengthFeature.h                   | 2 +-
 moses/FF/PhrasePairFeature.cpp                   | 2 +-
 moses/FF/PhrasePairFeature.h                     | 2 +-
 moses/FF/PhrasePenalty.h                         | 2 +-
 moses/FF/ReferenceComparison.h                   | 2 +-
 moses/FF/RuleScope.h                             | 2 +-
 moses/FF/SetSourcePhrase.h                       | 2 +-
 moses/FF/SkeletonStatefulFF.cpp                  | 2 +-
 moses/FF/SkeletonStatefulFF.h                    | 2 +-
 moses/FF/SkeletonStatelessFF.cpp                 | 2 +-
 moses/FF/SkeletonStatelessFF.h                   | 2 +-
 moses/FF/SoftMatchingFeature.h                   | 2 +-
 moses/FF/SourceGHKMTreeInputMatchFeature.h       | 2 +-
 moses/FF/SourceWordDeletionFeature.h             | 2 +-
 moses/FF/SpanLength.h                            | 2 +-
 moses/FF/SparseHieroReorderingFeature.h          | 2 +-
 moses/FF/StatefulFeatureFunction.h               | 2 +-
 moses/FF/StatelessFeatureFunction.h              | 2 +-
 moses/FF/SyntaxRHS.cpp                           | 2 +-
 moses/FF/SyntaxRHS.h                             | 2 +-
 moses/FF/TargetBigramFeature.cpp                 | 2 +-
 moses/FF/TargetBigramFeature.h                   | 2 +-
 moses/FF/TargetNgramFeature.cpp                  | 2 +-
 moses/FF/TargetNgramFeature.h                    | 2 +-
 moses/FF/TargetWordInsertionFeature.h            | 2 +-
 moses/FF/TreeStructureFeature.h                  | 2 +-
 moses/FF/UnknownWordPenaltyProducer.h            | 2 +-
 moses/FF/WordPenaltyProducer.h                   | 2 +-
 moses/FF/WordTranslationFeature.cpp              | 2 +-
 moses/FF/WordTranslationFeature.h                | 2 +-
 moses/Hypothesis.cpp                             | 6 +++---
 moses/LM/DALMWrapper.cpp                         | 2 +-
 moses/LM/DALMWrapper.h                           | 2 +-
 moses/LM/Implementation.cpp                      | 2 +-
 moses/LM/Implementation.h                        | 2 +-
 moses/LM/Ken.cpp                                 | 2 +-
 moses/LM/Ken.h                                   | 2 +-
 moses/ScoreComponentCollectionTest.cpp           | 2 +-
 68 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 348eaa0eaf..0fb1e257d8 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -502,7 +502,7 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase,
  * Given a previous state, compute Bleu score for the updated state with an additional target
  * phrase translated.
  */
-FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
+FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
                                     const FFState* prev_state,
                                     ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h
index cb974da207..c383648f06 100644
--- a/moses/FF/BleuScoreFeature.h
+++ b/moses/FF/BleuScoreFeature.h
@@ -115,7 +115,7 @@ class BleuScoreFeature : public StatefulFeatureFunction
                                        std::vector< size_t >&,
                                        size_t skip = 0) const;
 
-  FFState* Evaluate( const Hypothesis& cur_hypo,
+  FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
                      const FFState* prev_state,
                      ScoreComponentCollection* accumulator) const;
   FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp
index 9a8ecd1c3b..e0bc188ad6 100644
--- a/moses/FF/ConstrainedDecoding.cpp
+++ b/moses/FF/ConstrainedDecoding.cpp
@@ -100,7 +100,7 @@ const std::vector<Phrase> *GetConstraint(const std::map<long,std::vector<Phrase>
   }
 }
 
-FFState* ConstrainedDecoding::Evaluate(
+FFState* ConstrainedDecoding::EvaluateWhenApplied(
   const Hypothesis& hypo,
   const FFState* prev_state,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h
index 21d8a69c0a..c7eef65223 100644
--- a/moses/FF/ConstrainedDecoding.h
+++ b/moses/FF/ConstrainedDecoding.h
@@ -55,7 +55,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
   
-  FFState* Evaluate(
+  FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp
index d3e7c82ab2..adc36145ea 100644
--- a/moses/FF/ControlRecombination.cpp
+++ b/moses/FF/ControlRecombination.cpp
@@ -56,7 +56,7 @@ std::vector<float> ControlRecombination::DefaultWeights() const
   return ret;
 }
 
-FFState* ControlRecombination::Evaluate(
+FFState* ControlRecombination::EvaluateWhenApplied(
   const Hypothesis& hypo,
   const FFState* prev_state,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h
index c35714d543..f5b48027b8 100644
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@@ -69,7 +69,7 @@ class ControlRecombination : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  FFState* Evaluate(
+  FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h
index 1b078978c5..0962da5569 100644
--- a/moses/FF/CountNonTerms.h
+++ b/moses/FF/CountNonTerms.h
@@ -25,7 +25,7 @@ class CountNonTerms : public StatelessFeatureFunction
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp
index 81c19dc4bf..1a43b29888 100644
--- a/moses/FF/CoveredReferenceFeature.cpp
+++ b/moses/FF/CoveredReferenceFeature.cpp
@@ -90,7 +90,7 @@ void CoveredReferenceFeature::SetParameter(const std::string& key, const std::st
   }
 }
 
-FFState* CoveredReferenceFeature::Evaluate(
+FFState* CoveredReferenceFeature::EvaluateWhenApplied(
   const Hypothesis& cur_hypo,
   const FFState* prev_state,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h
index b1c77d4e31..4d4275f293 100644
--- a/moses/FF/CoveredReferenceFeature.h
+++ b/moses/FF/CoveredReferenceFeature.h
@@ -62,7 +62,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-  FFState* Evaluate(
+  FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h
index 27906fa872..c13af8d710 100644
--- a/moses/FF/DecodeFeature.h
+++ b/moses/FF/DecodeFeature.h
@@ -62,7 +62,7 @@ class DecodeFeature : public StatelessFeatureFunction
   bool IsUseable(const FactorMask &mask) const;
   void SetParameter(const std::string& key, const std::string& value);
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/DistortionScoreProducer.cpp b/moses/FF/DistortionScoreProducer.cpp
index 303f352368..5995fe2130 100644
--- a/moses/FF/DistortionScoreProducer.cpp
+++ b/moses/FF/DistortionScoreProducer.cpp
@@ -87,7 +87,7 @@ float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
 }
 
 
-FFState* DistortionScoreProducer::Evaluate(
+FFState* DistortionScoreProducer::EvaluateWhenApplied(
   const Hypothesis& hypo,
   const FFState* prev_state,
   ScoreComponentCollection* out) const
diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h
index bc979d231b..5f90c6e591 100644
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@@ -28,7 +28,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction
 
   virtual const FFState* EmptyHypothesisState(const InputType &input) const;
 
-  virtual FFState* Evaluate(
+  virtual FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/ExternalFeature.cpp b/moses/FF/ExternalFeature.cpp
index 1415411709..8fe6125441 100644
--- a/moses/FF/ExternalFeature.cpp
+++ b/moses/FF/ExternalFeature.cpp
@@ -51,7 +51,7 @@ void ExternalFeature::SetParameter(const std::string& key, const std::string& va
   }
 }
 
-FFState* ExternalFeature::Evaluate(
+FFState* ExternalFeature::EvaluateWhenApplied(
   const Hypothesis& cur_hypo,
   const FFState* prev_state,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h
index b2be498d45..f845842a5c 100644
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@@ -63,7 +63,7 @@ class ExternalFeature : public StatefulFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  FFState* Evaluate(
+  FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/GlobalLexicalModel.cpp b/moses/FF/GlobalLexicalModel.cpp
index ff9e87bb0b..f6eb165a80 100644
--- a/moses/FF/GlobalLexicalModel.cpp
+++ b/moses/FF/GlobalLexicalModel.cpp
@@ -165,7 +165,7 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
   return score;
 }
 
-void GlobalLexicalModel::Evaluate
+void GlobalLexicalModel::EvaluateWhenApplied
 (const Hypothesis& hypo,
  ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 16963117b0..1af2e79e1a 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -70,7 +70,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction
 
   bool IsUseable(const FactorMask &mask) const;
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
 
 
diff --git a/moses/FF/GlobalLexicalModelUnlimited.cpp b/moses/FF/GlobalLexicalModelUnlimited.cpp
index a6883a7e85..c8dbd5883d 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.cpp
+++ b/moses/FF/GlobalLexicalModelUnlimited.cpp
@@ -108,7 +108,7 @@ void GlobalLexicalModelUnlimited::InitializeForInput( Sentence const& in )
   m_local->input = &in;
 }
 
-void GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const
+void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const
 {
   const Sentence& input = *(m_local->input);
   const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h
index cc7bd17e96..688cc607cb 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.h
+++ b/moses/FF/GlobalLexicalModelUnlimited.h
@@ -81,7 +81,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
 
   //TODO: This implements the old interface, but cannot be updated because
   //it appears to be stateful
-  void Evaluate(const Hypothesis& cur_hypo,
+  void EvaluateWhenApplied(const Hypothesis& cur_hypo,
                 ScoreComponentCollection* accumulator) const;
 
   void EvaluateChart(const ChartHypothesis& /* cur_hypo */,
diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h
index e35e610245..4ed181431f 100644
--- a/moses/FF/HyperParameterAsWeight.h
+++ b/moses/FF/HyperParameterAsWeight.h
@@ -31,7 +31,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
                         , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
 
-  virtual void Evaluate(const Hypothesis& hypo,
+  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
                         ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h
index 2c83a958c2..7193d90d37 100644
--- a/moses/FF/InputFeature.h
+++ b/moses/FF/InputFeature.h
@@ -54,7 +54,7 @@ class InputFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h
index 2d1258bd76..821dfb826a 100644
--- a/moses/FF/InternalStructStatelessFF.h
+++ b/moses/FF/InternalStructStatelessFF.h
@@ -27,7 +27,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction
 	                        , const StackVec *stackVec
 	                        , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-	  virtual void Evaluate(const Hypothesis& hypo,
+	  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 	  void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index d692336c9b..426a7d91cb 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -69,7 +69,7 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const
   return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR));
 }
 
-FFState* LexicalReordering::Evaluate(const Hypothesis& hypo,
+FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo,
                                      const FFState* prev_state,
                                      ScoreComponentCollection* out) const
 {
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index 1071ebfd68..b6610639ea 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -45,7 +45,7 @@ class LexicalReordering : public StatefulFeatureFunction
 
   Scores GetProb(const Phrase& f, const Phrase& e) const;
 
-  virtual FFState* Evaluate(const Hypothesis& cur_hypo,
+  virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo,
                             const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h
index f0d0e34e65..df5b98417b 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.h
+++ b/moses/FF/MaxSpanFreeNonTermSource.h
@@ -27,7 +27,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction
 	                         , ScoreComponentCollection &scoreBreakdown
 	                         , ScoreComponentCollection *estimatedFutureScore = NULL) const;
 
-	  virtual void Evaluate(const Hypothesis& hypo,
+	  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp
index c8b62ea29c..921a95cbe3 100644
--- a/moses/FF/NieceTerminal.cpp
+++ b/moses/FF/NieceTerminal.cpp
@@ -71,7 +71,7 @@ void NieceTerminal::EvaluateWithSourceContext(const InputType &input
 
 }
 
-void NieceTerminal::Evaluate(const Hypothesis& hypo,
+void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo,
                                    ScoreComponentCollection* accumulator) const
 {}
 
diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h
index 0953be44fb..93e55d6347 100644
--- a/moses/FF/NieceTerminal.h
+++ b/moses/FF/NieceTerminal.h
@@ -29,7 +29,7 @@ class NieceTerminal : public StatelessFeatureFunction
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
   void EvaluateChart(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index ba5405729c..bc245d988b 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -87,7 +87,7 @@ void OpSequenceModel:: EvaluateInIsolation(const Phrase &source
 }
 
 
-FFState* OpSequenceModel::Evaluate(
+FFState* OpSequenceModel::EvaluateWhenApplied(
   const Hypothesis& cur_hypo,
   const FFState* prev_state,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 94fa6899de..7dbe2e0ca0 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -29,7 +29,7 @@ class OpSequenceModel : public StatefulFeatureFunction
   void readLanguageModel(const char *);
   void Load();
 
-  FFState* Evaluate(
+  FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/PhraseBoundaryFeature.cpp b/moses/FF/PhraseBoundaryFeature.cpp
index d82181b76a..3fdcf27f9c 100644
--- a/moses/FF/PhraseBoundaryFeature.cpp
+++ b/moses/FF/PhraseBoundaryFeature.cpp
@@ -66,7 +66,7 @@ void PhraseBoundaryFeature::AddFeatures(
 
 }
 
-FFState* PhraseBoundaryFeature::Evaluate
+FFState* PhraseBoundaryFeature::EvaluateWhenApplied
 (const Hypothesis& cur_hypo, const FFState* prev_state,
  ScoreComponentCollection* scores) const
 {
diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h
index 33bf43cb0b..9aec700dcb 100644
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@@ -44,7 +44,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction
 
   virtual const FFState* EmptyHypothesisState(const InputType &) const;
 
-  virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
+  virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
   virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h
index f154a2ef60..cf26d9a20b 100644
--- a/moses/FF/PhraseLengthFeature.h
+++ b/moses/FF/PhraseLengthFeature.h
@@ -24,7 +24,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction
     return true;
   }
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/PhrasePairFeature.cpp b/moses/FF/PhrasePairFeature.cpp
index 9277e19f27..f359b68f75 100644
--- a/moses/FF/PhrasePairFeature.cpp
+++ b/moses/FF/PhrasePairFeature.cpp
@@ -106,7 +106,7 @@ void PhrasePairFeature::Load()
   }
 }
 
-void PhrasePairFeature::Evaluate(
+void PhrasePairFeature::EvaluateWhenApplied(
   const Hypothesis& hypo,
   ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h
index 8d2a6a659b..94bf35af3e 100644
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@@ -37,7 +37,7 @@ class PhrasePairFeature: public StatelessFeatureFunction
 
   bool IsUseable(const FactorMask &mask) const;
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
 
   void EvaluateChart(const ChartHypothesis& hypo,
diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h
index 2002f88a3e..2babc7d676 100644
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@@ -19,7 +19,7 @@ class PhrasePenalty : public StatelessFeatureFunction
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index 7235aa5884..571242ce43 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -29,7 +29,7 @@ class ReferenceComparison : public StatelessFeatureFunction
 	                         , ScoreComponentCollection *estimatedFutureScore = NULL) const
 	  {}
 
-	  virtual void Evaluate(const Hypothesis& hypo,
+	  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h
index 0aa0552856..a051e411ad 100644
--- a/moses/FF/RuleScope.h
+++ b/moses/FF/RuleScope.h
@@ -27,7 +27,7 @@ class RuleScope : public StatelessFeatureFunction
 	                         , ScoreComponentCollection *estimatedFutureScore = NULL) const
 	  {}
 
-	  virtual void Evaluate(const Hypothesis& hypo,
+	  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index c78fe14fae..3f5bc82868 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -27,7 +27,7 @@ class SetSourcePhrase : public StatelessFeatureFunction
                          , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
 
-  virtual void Evaluate(const Hypothesis& hypo,
+  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
                         ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp
index 3c7eb249c1..0d1a0f9118 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/SkeletonStatefulFF.cpp
@@ -37,7 +37,7 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input
                                   , ScoreComponentCollection *estimatedFutureScore) const
 {}
 
-FFState* SkeletonStatefulFF::Evaluate(
+FFState* SkeletonStatefulFF::EvaluateWhenApplied(
   const Hypothesis& cur_hypo,
   const FFState* prev_state,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h
index c88381cefc..fd93bce55a 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/SkeletonStatefulFF.h
@@ -40,7 +40,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-  FFState* Evaluate(
+  FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp
index 6c6193372a..446d57b932 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/SkeletonStatelessFF.cpp
@@ -44,7 +44,7 @@ void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input
 
 }
 
-void SkeletonStatelessFF::Evaluate(const Hypothesis& hypo,
+void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
                                    ScoreComponentCollection* accumulator) const
 {}
 
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h
index 4b88fd2136..5d772b398c 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/SkeletonStatelessFF.h
@@ -25,7 +25,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
   void EvaluateChart(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h
index 110fc87bb7..37568325f1 100644
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@@ -32,7 +32,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const {};
 
   bool Load(const std::string &filePath);
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h
index 80ce6af90e..ef9155f489 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.h
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h
@@ -29,7 +29,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const {};
 
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h
index cba5ec0043..a2fec0f85c 100644
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@@ -39,7 +39,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h
index 7b3726a0eb..7792cc6d09 100644
--- a/moses/FF/SpanLength.h
+++ b/moses/FF/SpanLength.h
@@ -27,7 +27,7 @@ class SpanLength : public StatelessFeatureFunction
 	                         , ScoreComponentCollection *estimatedFutureScore = NULL) const;
 
 
-	  virtual void Evaluate(const Hypothesis& hypo,
+	  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
index b0f20d7f78..45ff1884a2 100644
--- a/moses/FF/SparseHieroReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -44,7 +44,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction
                         , ScoreComponentCollection *estimatedFutureScore = NULL)  const
 	{}
 
-  virtual void Evaluate(const Hypothesis& hypo,
+  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
                         ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h
index 75b46d827f..bf47ec5cf0 100644
--- a/moses/FF/StatefulFeatureFunction.h
+++ b/moses/FF/StatefulFeatureFunction.h
@@ -29,7 +29,7 @@ class StatefulFeatureFunction: public FeatureFunction
    * hypothesis, you should store it in an FFState object which will be passed
    * in as prev_state.  If you don't do this, you will get in trouble.
    */
-  virtual FFState* Evaluate(
+  virtual FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const = 0;
diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h
index fde740115b..e300ac8d32 100644
--- a/moses/FF/StatelessFeatureFunction.h
+++ b/moses/FF/StatelessFeatureFunction.h
@@ -23,7 +23,7 @@ class StatelessFeatureFunction: public FeatureFunction
   /**
     * This should be implemented for features that apply to phrase-based models.
     **/
-  virtual void Evaluate(const Hypothesis& hypo,
+  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
                         ScoreComponentCollection* accumulator) const = 0;
 
   /**
diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp
index e168ff4dde..de886cd0cb 100644
--- a/moses/FF/SyntaxRHS.cpp
+++ b/moses/FF/SyntaxRHS.cpp
@@ -42,7 +42,7 @@ void SyntaxRHS::EvaluateWithSourceContext(const InputType &input
 
 }
 
-void SyntaxRHS::Evaluate(const Hypothesis& hypo,
+void SyntaxRHS::EvaluateWhenApplied(const Hypothesis& hypo,
                                    ScoreComponentCollection* accumulator) const
 {}
 
diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h
index fedeac7246..ed455220bd 100644
--- a/moses/FF/SyntaxRHS.h
+++ b/moses/FF/SyntaxRHS.h
@@ -25,7 +25,7 @@ class SyntaxRHS : public StatelessFeatureFunction
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
   void EvaluateChart(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/TargetBigramFeature.cpp b/moses/FF/TargetBigramFeature.cpp
index 104f986e74..f1da62b7dd 100644
--- a/moses/FF/TargetBigramFeature.cpp
+++ b/moses/FF/TargetBigramFeature.cpp
@@ -64,7 +64,7 @@ const FFState* TargetBigramFeature::EmptyHypothesisState(const InputType &/*inpu
   return new TargetBigramState(m_bos);
 }
 
-FFState* TargetBigramFeature::Evaluate(const Hypothesis& cur_hypo,
+FFState* TargetBigramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
                                        const FFState* prev_state,
                                        ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h
index 6d4170a8a3..6b26bb2695 100644
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@@ -39,7 +39,7 @@ class TargetBigramFeature : public StatefulFeatureFunction
 
   virtual const FFState* EmptyHypothesisState(const InputType &input) const;
 
-  virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
+  virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
   virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp
index b0abb07a17..a2fc4e0400 100644
--- a/moses/FF/TargetNgramFeature.cpp
+++ b/moses/FF/TargetNgramFeature.cpp
@@ -95,7 +95,7 @@ const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input
   return new TargetNgramState(bos);
 }
 
-FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo,
+FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
                                       const FFState* prev_state,
                                       ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h
index a1da40d32c..914538dd4b 100644
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@@ -186,7 +186,7 @@ class TargetNgramFeature : public StatefulFeatureFunction
 
   virtual const FFState* EmptyHypothesisState(const InputType &input) const;
 
-  virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state,
+  virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
   virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h
index 3a9230c9da..6d48e7a982 100644
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@@ -39,7 +39,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const
   {}
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h
index fa915f9164..100e378c65 100644
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@@ -162,7 +162,7 @@ class TreeStructureFeature : public StatefulFeatureFunction
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const {};
-  FFState* Evaluate(
+  FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h
index 5a741db570..d1ac80a026 100644
--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@@ -31,7 +31,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction
   }
   std::vector<float> DefaultWeights() const;
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h
index 594610344d..5dc07c679c 100644
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@@ -31,7 +31,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction
                         , const TargetPhrase &targetPhrase
                         , ScoreComponentCollection &scoreBreakdown
                         , ScoreComponentCollection &estimatedFutureScore) const;
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp
index 554107c326..ed88c0e7b2 100644
--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@@ -137,7 +137,7 @@ void WordTranslationFeature::Load()
   }
 }
 
-void WordTranslationFeature::Evaluate
+void WordTranslationFeature::EvaluateWhenApplied
 (const Hypothesis& hypo,
  ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h
index 91dce3130b..9de73eaef8 100644
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@@ -48,7 +48,7 @@ class WordTranslationFeature : public StatelessFeatureFunction
     return new DummyState();
   }
 
-  void Evaluate(const Hypothesis& hypo,
+  void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
 
   void EvaluateChart(const ChartHypothesis &hypo,
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index 400fd0e0f7..61e7c3f714 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -211,7 +211,7 @@ void Hypothesis::EvaluateWith(const StatefulFeatureFunction &sfff,
 {
   const StaticData &staticData = StaticData::Instance();
   if (! staticData.IsFeatureFunctionIgnored( sfff )) {
-    m_ffStates[state_idx] = sfff.Evaluate(
+    m_ffStates[state_idx] = sfff.EvaluateWhenApplied(
                               *this,
                               m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
                               &m_scoreBreakdown);
@@ -222,7 +222,7 @@ void Hypothesis::EvaluateWith(const StatelessFeatureFunction& slff)
 {
   const StaticData &staticData = StaticData::Instance();
   if (! staticData.IsFeatureFunctionIgnored( slff )) {
-    slff.Evaluate(*this, &m_scoreBreakdown);
+    slff.EvaluateWhenApplied(*this, &m_scoreBreakdown);
   }
 }
 
@@ -254,7 +254,7 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore)
     const StatefulFeatureFunction &ff = *ffs[i];
     const StaticData &staticData = StaticData::Instance();
     if (! staticData.IsFeatureFunctionIgnored(ff)) {
-      m_ffStates[i] = ff.Evaluate(*this,
+      m_ffStates[i] = ff.EvaluateWhenApplied(*this,
                                   m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
                                   &m_scoreBreakdown);
     }
diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp
index 420efd9e8e..943b4f3af5 100644
--- a/moses/LM/DALMWrapper.cpp
+++ b/moses/LM/DALMWrapper.cpp
@@ -288,7 +288,7 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float
 	ngramScore = TransformLMScore(ngramScore);
 }
 
-FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{
   // In this function, we only compute the LM scores of n-grams that overlap a
   // phrase boundary. Phrase-internal scores are taken directly from the
   // translation option.
diff --git a/moses/LM/DALMWrapper.h b/moses/LM/DALMWrapper.h
index c791eeea66..ae3618cf9d 100644
--- a/moses/LM/DALMWrapper.h
+++ b/moses/LM/DALMWrapper.h
@@ -34,7 +34,7 @@ class LanguageModelDALM : public LanguageModel
 
   virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
 
-  virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
+  virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 
   virtual FFState *EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const;
 
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index ef09fbc77f..35082c2b15 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -134,7 +134,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco
   }
 }
 
-FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
+FFState *LanguageModelImplementation::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
 {
   // In this function, we only compute the LM scores of n-grams that overlap a
   // phrase boundary. Phrase-internal scores are taken directly from the
diff --git a/moses/LM/Implementation.h b/moses/LM/Implementation.h
index a39f5e42b6..6c8ee50a7b 100644
--- a/moses/LM/Implementation.h
+++ b/moses/LM/Implementation.h
@@ -89,7 +89,7 @@ class LanguageModelImplementation : public LanguageModel
 
   void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
 
-  FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
+  FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 
   FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const;
 
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 2dfb58c239..2674dbe8c7 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -229,7 +229,7 @@ template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phr
   fullScore = TransformLMScore(fullScore);
 }
 
-template <class Model> FFState *LanguageModelKen<Model>::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
+template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
 {
   const lm::ngram::State &in_state = static_cast<const KenLMState&>(*ps).state;
 
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index e5950f5913..931ba24129 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -55,7 +55,7 @@ template <class Model> class LanguageModelKen : public LanguageModel
 
   virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
 
-  virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
+  virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 
   virtual FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
 
diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp
index 719e05e7d3..d1064c6aea 100644
--- a/moses/ScoreComponentCollectionTest.cpp
+++ b/moses/ScoreComponentCollectionTest.cpp
@@ -34,7 +34,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction
 public:
   MockStatelessFeatureFunction(size_t n, const string &line) :
     StatelessFeatureFunction(n, line) {}
-  void Evaluate(const Hypothesis&, ScoreComponentCollection*) const {}
+  void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {}
   void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {}
   void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath

From e197b110fcc9c1a708da1fd88ec7f79492e74ff4 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 9 Jul 2014 23:54:16 +0100
Subject: [PATCH 39/84] rename Evaluate() to EvaluateWhenApplied()

---
 moses/ChartHypothesis.cpp                      | 4 ++--
 moses/FF/BleuScoreFeature.cpp                  | 2 +-
 moses/FF/BleuScoreFeature.h                    | 2 +-
 moses/FF/ConstrainedDecoding.cpp               | 2 +-
 moses/FF/ConstrainedDecoding.h                 | 2 +-
 moses/FF/ControlRecombination.cpp              | 2 +-
 moses/FF/ControlRecombination.h                | 2 +-
 moses/FF/CountNonTerms.h                       | 2 +-
 moses/FF/CoveredReferenceFeature.cpp           | 2 +-
 moses/FF/CoveredReferenceFeature.h             | 2 +-
 moses/FF/DecodeFeature.h                       | 2 +-
 moses/FF/DistortionScoreProducer.h             | 2 +-
 moses/FF/ExternalFeature.cpp                   | 2 +-
 moses/FF/ExternalFeature.h                     | 2 +-
 moses/FF/GlobalLexicalModel.h                  | 2 +-
 moses/FF/GlobalLexicalModelUnlimited.h         | 2 +-
 moses/FF/HyperParameterAsWeight.h              | 2 +-
 moses/FF/InputFeature.h                        | 2 +-
 moses/FF/InternalStructStatelessFF.h           | 2 +-
 moses/FF/LexicalReordering/LexicalReordering.h | 2 +-
 moses/FF/MaxSpanFreeNonTermSource.h            | 2 +-
 moses/FF/NieceTerminal.cpp                     | 2 +-
 moses/FF/NieceTerminal.h                       | 2 +-
 moses/FF/OSM-Feature/OpSequenceModel.cpp       | 2 +-
 moses/FF/OSM-Feature/OpSequenceModel.h         | 2 +-
 moses/FF/PhraseBoundaryFeature.h               | 2 +-
 moses/FF/PhraseLengthFeature.h                 | 2 +-
 moses/FF/PhrasePairFeature.h                   | 2 +-
 moses/FF/PhrasePenalty.h                       | 2 +-
 moses/FF/ReferenceComparison.h                 | 2 +-
 moses/FF/RuleScope.h                           | 2 +-
 moses/FF/SetSourcePhrase.h                     | 2 +-
 moses/FF/SkeletonStatefulFF.cpp                | 2 +-
 moses/FF/SkeletonStatefulFF.h                  | 2 +-
 moses/FF/SkeletonStatelessFF.cpp               | 2 +-
 moses/FF/SkeletonStatelessFF.h                 | 2 +-
 moses/FF/SoftMatchingFeature.cpp               | 2 +-
 moses/FF/SoftMatchingFeature.h                 | 2 +-
 moses/FF/SourceGHKMTreeInputMatchFeature.h     | 2 +-
 moses/FF/SourceWordDeletionFeature.h           | 2 +-
 moses/FF/SpanLength.h                          | 2 +-
 moses/FF/SparseHieroReorderingFeature.cpp      | 2 +-
 moses/FF/SparseHieroReorderingFeature.h        | 2 +-
 moses/FF/StatefulFeatureFunction.h             | 2 +-
 moses/FF/StatelessFeatureFunction.h            | 2 +-
 moses/FF/SyntaxRHS.cpp                         | 2 +-
 moses/FF/SyntaxRHS.h                           | 2 +-
 moses/FF/TargetBigramFeature.h                 | 2 +-
 moses/FF/TargetNgramFeature.cpp                | 2 +-
 moses/FF/TargetNgramFeature.h                  | 2 +-
 moses/FF/TargetWordInsertionFeature.h          | 2 +-
 moses/FF/TreeStructureFeature.cpp              | 2 +-
 moses/FF/TreeStructureFeature.h                | 2 +-
 moses/FF/UnknownWordPenaltyProducer.h          | 2 +-
 moses/FF/WordPenaltyProducer.h                 | 2 +-
 moses/FF/WordTranslationFeature.cpp            | 2 +-
 moses/FF/WordTranslationFeature.h              | 2 +-
 moses/Incremental.cpp                          | 2 +-
 moses/LM/DALMWrapper.cpp                       | 2 +-
 moses/LM/DALMWrapper.h                         | 2 +-
 moses/LM/Implementation.cpp                    | 2 +-
 moses/LM/Implementation.h                      | 2 +-
 moses/LM/Ken.cpp                               | 4 ++--
 moses/LM/Ken.h                                 | 2 +-
 moses/LM/LDHT.cpp                              | 4 ++--
 moses/ScoreComponentCollectionTest.cpp         | 2 +-
 moses/SyntacticLanguageModel.h                 | 2 +-
 67 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp
index 212a28d237..2bcc480e77 100644
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@@ -200,7 +200,7 @@ void ChartHypothesis::Evaluate()
     StatelessFeatureFunction::GetStatelessFeatureFunctions();
   for (unsigned i = 0; i < sfs.size(); ++i) {
     if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) {
-      sfs[i]->EvaluateChart(*this,&m_scoreBreakdown);
+      sfs[i]->EvaluateWhenApplied(*this,&m_scoreBreakdown);
     }
   }
 
@@ -208,7 +208,7 @@ void ChartHypothesis::Evaluate()
     StatefulFeatureFunction::GetStatefulFeatureFunctions();
   for (unsigned i = 0; i < ffs.size(); ++i) {
     if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) {
-      m_ffStates[i] = ffs[i]->EvaluateChart(*this,i,&m_scoreBreakdown);
+      m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_scoreBreakdown);
     }
   }
 
diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp
index 0fb1e257d8..0d0a20797a 100644
--- a/moses/FF/BleuScoreFeature.cpp
+++ b/moses/FF/BleuScoreFeature.cpp
@@ -563,7 +563,7 @@ FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo,
   return new_state;
 }
 
-FFState* BleuScoreFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureID,
+FFState* BleuScoreFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID,
     ScoreComponentCollection* accumulator ) const
 {
   if (!m_enabled) return new BleuScoreState();
diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h
index c383648f06..cdba578acb 100644
--- a/moses/FF/BleuScoreFeature.h
+++ b/moses/FF/BleuScoreFeature.h
@@ -118,7 +118,7 @@ class BleuScoreFeature : public StatefulFeatureFunction
   FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
                      const FFState* prev_state,
                      ScoreComponentCollection* accumulator) const;
-  FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
+  FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
                          int featureID,
                          ScoreComponentCollection* accumulator) const;
   void EvaluateWithSourceContext(const InputType &input
diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp
index e0bc188ad6..bfe4129135 100644
--- a/moses/FF/ConstrainedDecoding.cpp
+++ b/moses/FF/ConstrainedDecoding.cpp
@@ -143,7 +143,7 @@ FFState* ConstrainedDecoding::EvaluateWhenApplied(
   return ret;
 }
 
-FFState* ConstrainedDecoding::EvaluateChart(
+FFState* ConstrainedDecoding::EvaluateWhenApplied(
   const ChartHypothesis &hypo,
   int /* featureID - used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h
index c7eef65223..ca007f21d4 100644
--- a/moses/FF/ConstrainedDecoding.h
+++ b/moses/FF/ConstrainedDecoding.h
@@ -60,7 +60,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
 
-  FFState* EvaluateChart(
+  FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp
index adc36145ea..85e88ac943 100644
--- a/moses/FF/ControlRecombination.cpp
+++ b/moses/FF/ControlRecombination.cpp
@@ -64,7 +64,7 @@ FFState* ControlRecombination::EvaluateWhenApplied(
   return new ControlRecombinationState(hypo, *this);
 }
 
-FFState* ControlRecombination::EvaluateChart(
+FFState* ControlRecombination::EvaluateWhenApplied(
   const ChartHypothesis &hypo,
   int /* featureID - used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h
index f5b48027b8..095cc6b298 100644
--- a/moses/FF/ControlRecombination.h
+++ b/moses/FF/ControlRecombination.h
@@ -74,7 +74,7 @@ class ControlRecombination : public StatefulFeatureFunction
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
 
-  FFState* EvaluateChart(
+  FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h
index 0962da5569..c4e1467e9e 100644
--- a/moses/FF/CountNonTerms.h
+++ b/moses/FF/CountNonTerms.h
@@ -29,7 +29,7 @@ class CountNonTerms : public StatelessFeatureFunction
                 ScoreComponentCollection* accumulator) const
   {}
 
-  void EvaluateChart(
+  void EvaluateWhenApplied(
     const ChartHypothesis& hypo,
     ScoreComponentCollection* accumulator) const
   {}
diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp
index 1a43b29888..3a2482d0df 100644
--- a/moses/FF/CoveredReferenceFeature.cpp
+++ b/moses/FF/CoveredReferenceFeature.cpp
@@ -131,7 +131,7 @@ FFState* CoveredReferenceFeature::EvaluateWhenApplied(
   return ret;
 }
 
-FFState* CoveredReferenceFeature::EvaluateChart(
+FFState* CoveredReferenceFeature::EvaluateWhenApplied(
   const ChartHypothesis& /* cur_hypo */,
   int /* featureID - used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h
index 4d4275f293..a6cdd6f998 100644
--- a/moses/FF/CoveredReferenceFeature.h
+++ b/moses/FF/CoveredReferenceFeature.h
@@ -66,7 +66,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
-  FFState* EvaluateChart(
+  FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h
index c13af8d710..ac4e9392b6 100644
--- a/moses/FF/DecodeFeature.h
+++ b/moses/FF/DecodeFeature.h
@@ -65,7 +65,7 @@ class DecodeFeature : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
   void EvaluateWithSourceContext(const InputType &input
diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h
index 5f90c6e591..aa2c18b95d 100644
--- a/moses/FF/DistortionScoreProducer.h
+++ b/moses/FF/DistortionScoreProducer.h
@@ -33,7 +33,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
 
-  virtual FFState* EvaluateChart(
+  virtual FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection*) const {
diff --git a/moses/FF/ExternalFeature.cpp b/moses/FF/ExternalFeature.cpp
index 8fe6125441..10800d24df 100644
--- a/moses/FF/ExternalFeature.cpp
+++ b/moses/FF/ExternalFeature.cpp
@@ -59,7 +59,7 @@ FFState* ExternalFeature::EvaluateWhenApplied(
   return new ExternalFeatureState(m_stateSize);
 }
 
-FFState* ExternalFeature::EvaluateChart(
+FFState* ExternalFeature::EvaluateWhenApplied(
   const ChartHypothesis& /* cur_hypo */,
   int /* featureID - used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h
index f845842a5c..a8916a853f 100644
--- a/moses/FF/ExternalFeature.h
+++ b/moses/FF/ExternalFeature.h
@@ -68,7 +68,7 @@ class ExternalFeature : public StatefulFeatureFunction
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
 
-  FFState* EvaluateChart(
+  FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h
index 1af2e79e1a..151dbf4724 100644
--- a/moses/FF/GlobalLexicalModel.h
+++ b/moses/FF/GlobalLexicalModel.h
@@ -74,7 +74,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction
                 ScoreComponentCollection* accumulator) const;
 
 
-  void EvaluateChart(
+  void EvaluateWhenApplied(
     const ChartHypothesis& hypo,
     ScoreComponentCollection* accumulator) const {
     throw std::logic_error("GlobalLexicalModel not supported in chart decoder, yet");
diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h
index 688cc607cb..096254613c 100644
--- a/moses/FF/GlobalLexicalModelUnlimited.h
+++ b/moses/FF/GlobalLexicalModelUnlimited.h
@@ -84,7 +84,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& cur_hypo,
                 ScoreComponentCollection* accumulator) const;
 
-  void EvaluateChart(const ChartHypothesis& /* cur_hypo */,
+  void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
                      int /* featureID */,
                      ScoreComponentCollection* ) const {
     throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h
index 4ed181431f..aaad21c14a 100644
--- a/moses/FF/HyperParameterAsWeight.h
+++ b/moses/FF/HyperParameterAsWeight.h
@@ -38,7 +38,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
   /**
     * Same for chart-based features.
     **/
-  virtual void EvaluateChart(const ChartHypothesis &hypo,
+  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
                              ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h
index 7193d90d37..ad4fe398a5 100644
--- a/moses/FF/InputFeature.h
+++ b/moses/FF/InputFeature.h
@@ -57,7 +57,7 @@ class InputFeature : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h
index 821dfb826a..2ed8801e20 100644
--- a/moses/FF/InternalStructStatelessFF.h
+++ b/moses/FF/InternalStructStatelessFF.h
@@ -30,7 +30,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction
 	  virtual void EvaluateWhenApplied(const Hypothesis& hypo,
 	                        ScoreComponentCollection* accumulator) const
 	  {}
-	  void EvaluateChart(const ChartHypothesis &hypo,
+	  void EvaluateWhenApplied(const ChartHypothesis &hypo,
 	                             ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h
index b6610639ea..09d3b73cc6 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.h
+++ b/moses/FF/LexicalReordering/LexicalReordering.h
@@ -49,7 +49,7 @@ class LexicalReordering : public StatefulFeatureFunction
                             const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
-  virtual FFState* EvaluateChart(const ChartHypothesis&,
+  virtual FFState* EvaluateWhenApplied(const ChartHypothesis&,
                                  int /* featureID */,
                                  ScoreComponentCollection*) const {
     UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder");
diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h
index df5b98417b..973b374d8e 100644
--- a/moses/FF/MaxSpanFreeNonTermSource.h
+++ b/moses/FF/MaxSpanFreeNonTermSource.h
@@ -31,7 +31,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
-	  virtual void EvaluateChart(const ChartHypothesis &hypo,
+	  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
 	                             ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp
index 921a95cbe3..b3a5f8f922 100644
--- a/moses/FF/NieceTerminal.cpp
+++ b/moses/FF/NieceTerminal.cpp
@@ -75,7 +75,7 @@ void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo,
                                    ScoreComponentCollection* accumulator) const
 {}
 
-void NieceTerminal::EvaluateChart(const ChartHypothesis &hypo,
+void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo,
                                         ScoreComponentCollection* accumulator) const
 {}
 
diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h
index 93e55d6347..7daf2963e6 100644
--- a/moses/FF/NieceTerminal.h
+++ b/moses/FF/NieceTerminal.h
@@ -31,7 +31,7 @@ class NieceTerminal : public StatelessFeatureFunction
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
 
   void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp
index bc245d988b..7939421513 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.cpp
+++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp
@@ -194,7 +194,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied(
 // return NULL;
 }
 
-FFState* OpSequenceModel::EvaluateChart(
+FFState* OpSequenceModel::EvaluateWhenApplied(
   const ChartHypothesis& /* cur_hypo */,
   int /* featureID - used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h
index 7dbe2e0ca0..c4d26f98ef 100644
--- a/moses/FF/OSM-Feature/OpSequenceModel.h
+++ b/moses/FF/OSM-Feature/OpSequenceModel.h
@@ -34,7 +34,7 @@ class OpSequenceModel : public StatefulFeatureFunction
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
 
-  virtual FFState* EvaluateChart(
+  virtual FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h
index 9aec700dcb..e4c3ca3bab 100644
--- a/moses/FF/PhraseBoundaryFeature.h
+++ b/moses/FF/PhraseBoundaryFeature.h
@@ -47,7 +47,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction
   virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
-  virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
+  virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
                                   int /* featureID */,
                                   ScoreComponentCollection* ) const {
     throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet");
diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h
index cf26d9a20b..4976e22100 100644
--- a/moses/FF/PhraseLengthFeature.h
+++ b/moses/FF/PhraseLengthFeature.h
@@ -28,7 +28,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction
                 ScoreComponentCollection* accumulator) const
   {}
 
-  void EvaluateChart(const ChartHypothesis& hypo,
+  void EvaluateWhenApplied(const ChartHypothesis& hypo,
                      ScoreComponentCollection*) const {
     throw std::logic_error("PhraseLengthFeature not valid in chart decoder");
   }
diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h
index 94bf35af3e..8bfac628d1 100644
--- a/moses/FF/PhrasePairFeature.h
+++ b/moses/FF/PhrasePairFeature.h
@@ -40,7 +40,7 @@ class PhrasePairFeature: public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
 
-  void EvaluateChart(const ChartHypothesis& hypo,
+  void EvaluateWhenApplied(const ChartHypothesis& hypo,
                      ScoreComponentCollection*) const {
     throw std::logic_error("PhrasePairFeature not valid in chart decoder");
   }
diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h
index 2babc7d676..f822e583b8 100644
--- a/moses/FF/PhrasePenalty.h
+++ b/moses/FF/PhrasePenalty.h
@@ -22,7 +22,7 @@ class PhrasePenalty : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h
index 571242ce43..62cf15ced5 100644
--- a/moses/FF/ReferenceComparison.h
+++ b/moses/FF/ReferenceComparison.h
@@ -33,7 +33,7 @@ class ReferenceComparison : public StatelessFeatureFunction
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
-	  virtual void EvaluateChart(const ChartHypothesis &hypo,
+	  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
 	                             ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h
index a051e411ad..a2c9e06f36 100644
--- a/moses/FF/RuleScope.h
+++ b/moses/FF/RuleScope.h
@@ -31,7 +31,7 @@ class RuleScope : public StatelessFeatureFunction
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
-	  virtual void EvaluateChart(const ChartHypothesis &hypo,
+	  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
 	                             ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h
index 3f5bc82868..81f293dde5 100644
--- a/moses/FF/SetSourcePhrase.h
+++ b/moses/FF/SetSourcePhrase.h
@@ -31,7 +31,7 @@ class SetSourcePhrase : public StatelessFeatureFunction
                         ScoreComponentCollection* accumulator) const
   {}
 
-  virtual void EvaluateChart(const ChartHypothesis &hypo,
+  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
                              ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp
index 0d1a0f9118..fe81aeeae1 100644
--- a/moses/FF/SkeletonStatefulFF.cpp
+++ b/moses/FF/SkeletonStatefulFF.cpp
@@ -56,7 +56,7 @@ FFState* SkeletonStatefulFF::EvaluateWhenApplied(
   return new SkeletonState(0);
 }
 
-FFState* SkeletonStatefulFF::EvaluateChart(
+FFState* SkeletonStatefulFF::EvaluateWhenApplied(
   const ChartHypothesis& /* cur_hypo */,
   int /* featureID - used to index the state in the previous hypotheses */,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h
index fd93bce55a..6fa26803eb 100644
--- a/moses/FF/SkeletonStatefulFF.h
+++ b/moses/FF/SkeletonStatefulFF.h
@@ -44,7 +44,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const;
-  FFState* EvaluateChart(
+  FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp
index 446d57b932..80c7d130ed 100644
--- a/moses/FF/SkeletonStatelessFF.cpp
+++ b/moses/FF/SkeletonStatelessFF.cpp
@@ -48,7 +48,7 @@ void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo,
                                    ScoreComponentCollection* accumulator) const
 {}
 
-void SkeletonStatelessFF::EvaluateChart(const ChartHypothesis &hypo,
+void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo,
                                         ScoreComponentCollection* accumulator) const
 {}
 
diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h
index 5d772b398c..520ec14058 100644
--- a/moses/FF/SkeletonStatelessFF.h
+++ b/moses/FF/SkeletonStatelessFF.h
@@ -27,7 +27,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
 
   void SetParameter(const std::string& key, const std::string& value);
diff --git a/moses/FF/SoftMatchingFeature.cpp b/moses/FF/SoftMatchingFeature.cpp
index 017e551c41..0475547daa 100644
--- a/moses/FF/SoftMatchingFeature.cpp
+++ b/moses/FF/SoftMatchingFeature.cpp
@@ -61,7 +61,7 @@ bool SoftMatchingFeature::Load(const std::string& filePath)
    return true;
 }
 
-void SoftMatchingFeature::EvaluateChart(const ChartHypothesis& hypo,
+void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
                              ScoreComponentCollection* accumulator) const
 {
 
diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h
index 37568325f1..ff923ea082 100644
--- a/moses/FF/SoftMatchingFeature.h
+++ b/moses/FF/SoftMatchingFeature.h
@@ -19,7 +19,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction
     return true;
   }
 
-  virtual void EvaluateChart(const ChartHypothesis& hypo,
+  virtual void EvaluateWhenApplied(const ChartHypothesis& hypo,
                              ScoreComponentCollection* accumulator) const;
 
   void EvaluateInIsolation(const Phrase &source
diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h
index ef9155f489..743871b1c3 100644
--- a/moses/FF/SourceGHKMTreeInputMatchFeature.h
+++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h
@@ -32,7 +32,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const {};
 
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const {};
 
 };
diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h
index a2fec0f85c..8211ef0ca1 100644
--- a/moses/FF/SourceWordDeletionFeature.h
+++ b/moses/FF/SourceWordDeletionFeature.h
@@ -42,7 +42,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h
index 7792cc6d09..dc5564fcdf 100644
--- a/moses/FF/SpanLength.h
+++ b/moses/FF/SpanLength.h
@@ -31,7 +31,7 @@ class SpanLength : public StatelessFeatureFunction
 	                        ScoreComponentCollection* accumulator) const
 	  {}
 
-	  virtual void EvaluateChart(const ChartHypothesis &hypo,
+	  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
 	                             ScoreComponentCollection* accumulator) const
 	  {}
 
diff --git a/moses/FF/SparseHieroReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp
index f42f5de2fc..0c6ac47672 100644
--- a/moses/FF/SparseHieroReorderingFeature.cpp
+++ b/moses/FF/SparseHieroReorderingFeature.cpp
@@ -81,7 +81,7 @@ const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vo
   return factor;
 }
 
-void SparseHieroReorderingFeature::EvaluateChart(
+void SparseHieroReorderingFeature::EvaluateWhenApplied(
   const ChartHypothesis&  cur_hypo ,
   ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h
index 45ff1884a2..d631fdec10 100644
--- a/moses/FF/SparseHieroReorderingFeature.h
+++ b/moses/FF/SparseHieroReorderingFeature.h
@@ -47,7 +47,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction
   virtual void EvaluateWhenApplied(const Hypothesis& hypo,
                         ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                              ScoreComponentCollection* accumulator) const;
 
 
diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h
index bf47ec5cf0..86bed04eea 100644
--- a/moses/FF/StatefulFeatureFunction.h
+++ b/moses/FF/StatefulFeatureFunction.h
@@ -34,7 +34,7 @@ class StatefulFeatureFunction: public FeatureFunction
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const = 0;
 
-  virtual FFState* EvaluateChart(
+  virtual FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const = 0;
diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h
index e300ac8d32..94029f8827 100644
--- a/moses/FF/StatelessFeatureFunction.h
+++ b/moses/FF/StatelessFeatureFunction.h
@@ -29,7 +29,7 @@ class StatelessFeatureFunction: public FeatureFunction
   /**
     * Same for chart-based features.
     **/
-  virtual void EvaluateChart(const ChartHypothesis &hypo,
+  virtual void EvaluateWhenApplied(const ChartHypothesis &hypo,
                              ScoreComponentCollection* accumulator) const = 0;
 
   virtual bool IsStateless() const {
diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp
index de886cd0cb..5168b72d7c 100644
--- a/moses/FF/SyntaxRHS.cpp
+++ b/moses/FF/SyntaxRHS.cpp
@@ -46,7 +46,7 @@ void SyntaxRHS::EvaluateWhenApplied(const Hypothesis& hypo,
                                    ScoreComponentCollection* accumulator) const
 {}
 
-void SyntaxRHS::EvaluateChart(const ChartHypothesis &hypo,
+void SyntaxRHS::EvaluateWhenApplied(const ChartHypothesis &hypo,
                                         ScoreComponentCollection* accumulator) const
 {}
 
diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h
index ed455220bd..4b92149952 100644
--- a/moses/FF/SyntaxRHS.h
+++ b/moses/FF/SyntaxRHS.h
@@ -27,7 +27,7 @@ class SyntaxRHS : public StatelessFeatureFunction
                 , ScoreComponentCollection *estimatedFutureScore = NULL) const;
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
 
 };
diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h
index 6b26bb2695..c63f3caa41 100644
--- a/moses/FF/TargetBigramFeature.h
+++ b/moses/FF/TargetBigramFeature.h
@@ -42,7 +42,7 @@ class TargetBigramFeature : public StatefulFeatureFunction
   virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
-  virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */,
+  virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */,
                                   int /* featureID */,
                                   ScoreComponentCollection* ) const {
     throw std::logic_error("TargetBigramFeature not valid in chart decoder");
diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp
index a2fc4e0400..a434109902 100644
--- a/moses/FF/TargetNgramFeature.cpp
+++ b/moses/FF/TargetNgramFeature.cpp
@@ -207,7 +207,7 @@ void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream
   }
 }
 
-FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
+FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const
 {
   vector<const Word*> contextFactor;
   contextFactor.reserve(m_n);
diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h
index 914538dd4b..e87252670a 100644
--- a/moses/FF/TargetNgramFeature.h
+++ b/moses/FF/TargetNgramFeature.h
@@ -189,7 +189,7 @@ class TargetNgramFeature : public StatefulFeatureFunction
   virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state,
                             ScoreComponentCollection* accumulator) const;
 
-  virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
+  virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId,
                                  ScoreComponentCollection* accumulator) const;
 
   void EvaluateWithSourceContext(const InputType &input
diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h
index 6d48e7a982..06fa25400a 100644
--- a/moses/FF/TargetWordInsertionFeature.h
+++ b/moses/FF/TargetWordInsertionFeature.h
@@ -42,7 +42,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
 
diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp
index a5446891ae..c0505edd6e 100644
--- a/moses/FF/TreeStructureFeature.cpp
+++ b/moses/FF/TreeStructureFeature.cpp
@@ -266,7 +266,7 @@ void TreeStructureFeature::AddNTLabels(TreePointer root) const {
       }
 }
 
-FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo
+FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
                                    , int featureID /* used to index the state in the previous hypotheses */
                                    , ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h
index 100e378c65..a81d604bb1 100644
--- a/moses/FF/TreeStructureFeature.h
+++ b/moses/FF/TreeStructureFeature.h
@@ -166,7 +166,7 @@ class TreeStructureFeature : public StatefulFeatureFunction
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
     ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");};
-  FFState* EvaluateChart(
+  FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
     ScoreComponentCollection* accumulator) const;
diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h
index d1ac80a026..8850641e51 100644
--- a/moses/FF/UnknownWordPenaltyProducer.h
+++ b/moses/FF/UnknownWordPenaltyProducer.h
@@ -34,7 +34,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
   void EvaluateWithSourceContext(const InputType &input
diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h
index 5dc07c679c..e628773075 100644
--- a/moses/FF/WordPenaltyProducer.h
+++ b/moses/FF/WordPenaltyProducer.h
@@ -34,7 +34,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const
   {}
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const
   {}
   void EvaluateWithSourceContext(const InputType &input
diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp
index ed88c0e7b2..7a98ad4c8a 100644
--- a/moses/FF/WordTranslationFeature.cpp
+++ b/moses/FF/WordTranslationFeature.cpp
@@ -349,7 +349,7 @@ void WordTranslationFeature::EvaluateWhenApplied
   }
 }
 
-void WordTranslationFeature::EvaluateChart(
+void WordTranslationFeature::EvaluateWhenApplied(
   const ChartHypothesis &hypo,
   ScoreComponentCollection* accumulator) const
 {
diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h
index 9de73eaef8..c213d8eb3e 100644
--- a/moses/FF/WordTranslationFeature.h
+++ b/moses/FF/WordTranslationFeature.h
@@ -51,7 +51,7 @@ class WordTranslationFeature : public StatelessFeatureFunction
   void EvaluateWhenApplied(const Hypothesis& hypo,
                 ScoreComponentCollection* accumulator) const;
 
-  void EvaluateChart(const ChartHypothesis &hypo,
+  void EvaluateWhenApplied(const ChartHypothesis &hypo,
                      ScoreComponentCollection* accumulator) const;
   void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index 4e593df7ef..c8a48d425a 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -327,7 +327,7 @@ void PhraseAndFeatures(const search::Applied final, Phrase &phrase, ScoreCompone
 
   const LanguageModel &model = LanguageModel::GetFirstLM();
   model.CalcScore(phrase, full, ignored_ngram, ignored_oov);
-  // CalcScore transforms, but EvaluateChart doesn't.
+  // CalcScore transforms, but EvaluateWhenApplied doesn't.
   features.Assign(&model, full);
 }
 
diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp
index 943b4f3af5..68b3050de5 100644
--- a/moses/LM/DALMWrapper.cpp
+++ b/moses/LM/DALMWrapper.cpp
@@ -339,7 +339,7 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FF
   return dalm_state;
 }
 
-FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
+FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{
   // initialize language model context state
  	DALMChartState *newState = new DALMChartState();
 	DALM::State &state = newState->GetRightContext();
diff --git a/moses/LM/DALMWrapper.h b/moses/LM/DALMWrapper.h
index ae3618cf9d..ad53819c0a 100644
--- a/moses/LM/DALMWrapper.h
+++ b/moses/LM/DALMWrapper.h
@@ -36,7 +36,7 @@ class LanguageModelDALM : public LanguageModel
 
   virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 
-  virtual FFState *EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const;
+  virtual FFState *EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const;
 
   virtual bool IsUseable(const FactorMask &mask) const;
 
diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp
index 35082c2b15..bd5bd18345 100644
--- a/moses/LM/Implementation.cpp
+++ b/moses/LM/Implementation.cpp
@@ -222,7 +222,7 @@ FFState *LanguageModelImplementation::EvaluateWhenApplied(const Hypothesis &hypo
   return res;
 }
 
-FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out) const
+FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out) const
 {
   LanguageModelChartState *ret = new LanguageModelChartState(hypo, featureID, GetNGramOrder());
   // data structure for factored context phrase (history and predicted word)
diff --git a/moses/LM/Implementation.h b/moses/LM/Implementation.h
index 6c8ee50a7b..5eb8fb2096 100644
--- a/moses/LM/Implementation.h
+++ b/moses/LM/Implementation.h
@@ -91,7 +91,7 @@ class LanguageModelImplementation : public LanguageModel
 
   FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 
-  FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const;
+  FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const;
 
   void updateChartScore(float *prefixScore, float *finalScore, float score, size_t wordPos) const;
 
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 2674dbe8c7..e69746084a 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -79,7 +79,7 @@ struct KenLMState : public FFState {
 //
 //  FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 //
-//  FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
+//  FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
 //
 //  void IncrementalCallback(Incremental::Manager &manager) const {
 //    manager.LMCallback(*m_ngram, m_lmIdLookup);
@@ -307,7 +307,7 @@ class LanguageModelChartStateKenLM : public FFState
   lm::ngram::ChartState m_state;
 };
 
-template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const
+template <class Model> FFState *LanguageModelKen<Model>::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const
 {
   LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
   lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index 931ba24129..2f473b697b 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -57,7 +57,7 @@ template <class Model> class LanguageModelKen : public LanguageModel
 
   virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
 
-  virtual FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
+  virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
 
   virtual void IncrementalCallback(Incremental::Manager &manager) const;
   virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
diff --git a/moses/LM/LDHT.cpp b/moses/LM/LDHT.cpp
index 61226208ca..1d0331df5d 100644
--- a/moses/LM/LDHT.cpp
+++ b/moses/LM/LDHT.cpp
@@ -97,7 +97,7 @@ class LanguageModelLDHT : public LanguageModel
   FFState* Evaluate(const Hypothesis& hypo,
                     const FFState* input_state,
                     ScoreComponentCollection* score_output) const;
-  FFState* EvaluateChart(const ChartHypothesis& hypo,
+  FFState* EvaluateWhenApplied(const ChartHypothesis& hypo,
                          int featureID,
                          ScoreComponentCollection* accumulator) const;
 
@@ -392,7 +392,7 @@ FFState* LanguageModelLDHT::Evaluate(
   return state;
 }
 
-FFState* LanguageModelLDHT::EvaluateChart(
+FFState* LanguageModelLDHT::EvaluateWhenApplied(
   const ChartHypothesis& hypo,
   int featureID,
   ScoreComponentCollection* accumulator) const
diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp
index d1064c6aea..a238d66b87 100644
--- a/moses/ScoreComponentCollectionTest.cpp
+++ b/moses/ScoreComponentCollectionTest.cpp
@@ -35,7 +35,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction
   MockStatelessFeatureFunction(size_t n, const string &line) :
     StatelessFeatureFunction(n, line) {}
   void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {}
-  void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {}
+  void EvaluateWhenApplied(const ChartHypothesis&, ScoreComponentCollection*) const {}
   void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
diff --git a/moses/SyntacticLanguageModel.h b/moses/SyntacticLanguageModel.h
index 6e88d85c1e..76882a4d15 100644
--- a/moses/SyntacticLanguageModel.h
+++ b/moses/SyntacticLanguageModel.h
@@ -30,7 +30,7 @@ class SyntacticLanguageModel : public StatefulFeatureFunction
                     const FFState* prev_state,
                     ScoreComponentCollection* accumulator) const;
 
-  FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
+  FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
                          int featureID,
                          ScoreComponentCollection* accumulator) const {
     throw std::runtime_error("Syntactic LM can only be used with phrase-based decoder.");

From 98d464727b3312bb23ce57d0eb83d6d1f3e5ddb6 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 10 Jul 2014 11:40:32 +0100
Subject: [PATCH 40/84] add LBLLM from Blunsom's group

---
 contrib/other-builds/moses/.project |  10 +++
 moses/FF/Factory.cpp                |   2 +
 moses/FF/LBLLM.cpp                  | 131 ++++++++++++++++++++++++++++
 moses/FF/LBLLM.h                    |  65 ++++++++++++++
 4 files changed, 208 insertions(+)
 create mode 100644 moses/FF/LBLLM.cpp
 create mode 100644 moses/FF/LBLLM.h

diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 1c22fca311..120323a085 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1156,6 +1156,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalStructStatelessFF.h</locationURI>
 		</link>
+		<link>
+			<name>FF/LBLLM.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LBLLM.cpp</locationURI>
+		</link>
+		<link>
+			<name>FF/LBLLM.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LBLLM.h</locationURI>
+		</link>
 		<link>
 			<name>FF/LexicalReordering</name>
 			<type>2</type>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index c9a7ef8fc7..38fbccc890 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -48,6 +48,7 @@
 #include "NieceTerminal.h"
 #include "SpanLength.h"
 #include "SyntaxRHS.h"
+#include "LBLLM.h"
 
 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"
@@ -204,6 +205,7 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(SparseHieroReorderingFeature);
   MOSES_FNAME(SpanLength);
   MOSES_FNAME(SyntaxRHS);
+  MOSES_FNAME(LBLLM);
 
   MOSES_FNAME(SkeletonStatelessFF);
   MOSES_FNAME(SkeletonStatefulFF);
diff --git a/moses/FF/LBLLM.cpp b/moses/FF/LBLLM.cpp
new file mode 100644
index 0000000000..c57d19acc0
--- /dev/null
+++ b/moses/FF/LBLLM.cpp
@@ -0,0 +1,131 @@
+#include <vector>
+#include <boost/archive/text_iarchive.hpp>
+#include "LBLLM.h"
+#include "moses/ScoreComponentCollection.h"
+#include "moses/Hypothesis.h"
+
+using namespace std;
+
+namespace Moses
+{
+int LBLLMState::Compare(const FFState& other) const
+{
+  const LBLLMState &otherState = static_cast<const LBLLMState&>(other);
+
+  if (m_targetLen == otherState.m_targetLen)
+    return 0;
+  return (m_targetLen < otherState.m_targetLen) ? -1 : +1;
+}
+
+////////////////////////////////////////////////////////////////
+LBLLM::LBLLM(const std::string &line)
+  :StatefulFeatureFunction(3, line)
+{
+  ReadParameters();
+}
+
+void LBLLM::Load()
+{
+	{
+	  cerr << "Reading LM from " << m_lmPath << " ...\n";
+	  //ifstream ifile(lm_file.c_str(), ios::in | ios::binary);
+	  ifstream ifile(m_lmPath.c_str(), ios::in);
+	  if (!ifile.good()) {
+	    cerr << "Failed to open " << m_lmPath << " for reading\n";
+	    abort();
+	  }
+	  boost::archive::text_iarchive ia(ifile);
+	  ia >> lm;
+	  dict = lm.label_set();
+	}
+    /*
+    {
+      ifstream z_ifile((lm_file+".z").c_str(), ios::in);
+      if (!z_ifile.good()) {
+        cerr << "Failed to open " << (lm_file+".z") << " for reading\n";
+        abort();
+      }
+      cerr << "Reading LM Z from " << lm_file+".z" << " ...\n";
+      boost::archive::text_iarchive ia(z_ifile);
+      ia >> z_approx;
+    }
+    */
+
+    cerr << "Initializing map contents (map size=" << dict.max() << ")\n";
+    for (int i = 1; i < dict.max(); ++i)
+      AddToWordMap(i);
+    cerr << "Done.\n";
+    ss_off = OrderToStateSize(kORDER)-1;  // offset of "state size" member
+    FeatureFunction::SetStateSize(OrderToStateSize(kORDER));
+    kSTART = dict.Convert("<s>");
+    kSTOP = dict.Convert("</s>");
+    kUNKNOWN = dict.Convert("_UNK_");
+    kNONE = -1;
+    kSTAR = dict.Convert("<{STAR}>");
+    last_id = 0;
+
+    // optional online "adaptation" by training on previous references
+    if (reffile.size()) {
+      cerr << "Reference file: " << reffile << endl;
+      set<WordID> rv;
+      oxlm::ReadFromFile(reffile, &dict, &ref_sents, &rv);
+    }
+
+}
+
+void LBLLM::EvaluateInIsolation(const Phrase &source
+                                  , const TargetPhrase &targetPhrase
+                                  , ScoreComponentCollection &scoreBreakdown
+                                  , ScoreComponentCollection &estimatedFutureScore) const
+{}
+
+void LBLLM::EvaluateWithSourceContext(const InputType &input
+                                  , const InputPath &inputPath
+                                  , const TargetPhrase &targetPhrase
+                                  , const StackVec *stackVec
+                                  , ScoreComponentCollection &scoreBreakdown
+                                  , ScoreComponentCollection *estimatedFutureScore) const
+{}
+
+FFState* LBLLM::EvaluateWhenApplied(
+  const Hypothesis& cur_hypo,
+  const FFState* prev_state,
+  ScoreComponentCollection* accumulator) const
+{
+  // dense scores
+  vector<float> newScores(m_numScoreComponents);
+  newScores[0] = 1.5;
+  newScores[1] = 0.3;
+  newScores[2] = 0.4;
+  accumulator->PlusEquals(this, newScores);
+
+  // sparse scores
+  accumulator->PlusEquals(this, "sparse-name", 2.4);
+
+  // int targetLen = cur_hypo.GetCurrTargetPhrase().GetSize(); // ??? [UG]
+  return new LBLLMState(0);
+}
+
+FFState* LBLLM::EvaluateWhenApplied(
+  const ChartHypothesis& /* cur_hypo */,
+  int /* featureID - used to index the state in the previous hypotheses */,
+  ScoreComponentCollection* accumulator) const
+{
+  return new LBLLMState(0);
+}
+
+void LBLLM::SetParameter(const std::string& key, const std::string& value)
+{
+  if (key == "lm-file") {
+	  m_lmPath = value;
+  }
+  else if (key == "ref-file") {
+	  m_refPath = value;
+  }
+  else {
+    StatefulFeatureFunction::SetParameter(key, value);
+  }
+}
+
+}
+
diff --git a/moses/FF/LBLLM.h b/moses/FF/LBLLM.h
new file mode 100644
index 0000000000..3a667a0f87
--- /dev/null
+++ b/moses/FF/LBLLM.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <string>
+#include "StatefulFeatureFunction.h"
+#include "FFState.h"
+#include "lbl/nlm.h"
+
+namespace Moses
+{
+
+class LBLLMState : public FFState
+{
+  int m_targetLen;
+public:
+  LBLLMState(int targetLen)
+  {}
+
+  int Compare(const FFState& other) const;
+};
+
+class LBLLM : public StatefulFeatureFunction
+{
+public:
+  LBLLM(const std::string &line);
+  void Load();
+
+  bool IsUseable(const FactorMask &mask) const {
+    return true;
+  }
+  virtual const FFState* EmptyHypothesisState(const InputType &input) const {
+    return new LBLLMState(0);
+  }
+
+  void EvaluateInIsolation(const Phrase &source
+                , const TargetPhrase &targetPhrase
+                , ScoreComponentCollection &scoreBreakdown
+                , ScoreComponentCollection &estimatedFutureScore) const;
+  void EvaluateWithSourceContext(const InputType &input
+                , const InputPath &inputPath
+                , const TargetPhrase &targetPhrase
+                , const StackVec *stackVec
+                , ScoreComponentCollection &scoreBreakdown
+                , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+  FFState* EvaluateWhenApplied(
+    const Hypothesis& cur_hypo,
+    const FFState* prev_state,
+    ScoreComponentCollection* accumulator) const;
+  FFState* EvaluateWhenApplied(
+    const ChartHypothesis& /* cur_hypo */,
+    int /* featureID - used to index the state in the previous hypotheses */,
+    ScoreComponentCollection* accumulator) const;
+
+  void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+  std::string m_lmPath, m_refPath;
+
+  oxlm::Dict dict;
+  oxlm::FactoredOutputNLM lm;
+
+};
+
+
+}
+

From 5b2bafde7613fc8bbbf6e88563fc056f221535fa Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 10 Jul 2014 12:16:38 +0100
Subject: [PATCH 41/84] compiles with eclipse

---
 contrib/other-builds/moses/.cproject | 11 ++++-----
 contrib/other-builds/moses/.project  | 20 ++++++++--------
 moses/FF/Factory.cpp                 |  9 +++++--
 moses/{FF => LM}/LBLLM.cpp           | 35 ++--------------------------
 moses/{FF => LM}/LBLLM.h             |  7 ++++--
 5 files changed, 29 insertions(+), 53 deletions(-)
 rename moses/{FF => LM}/LBLLM.cpp (70%)
 rename moses/{FF => LM}/LBLLM.h (93%)

diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 409adfcc57..1a06fe7a8b 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -41,6 +41,8 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../oxlm/src&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen-3&quot;"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
@@ -53,11 +55,11 @@
 									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
 									<listOptionValue builtIn="false" value="LM_IRST"/>
 									<listOptionValue builtIn="false" value="LM_DALM"/>
-									<listOptionValue builtIn="false" value="LM_RAND"/>
 									<listOptionValue builtIn="false" value="LM_NPLM"/>
 									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
 									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
 								</option>
+								<option id="gnu.cpp.compiler.option.dialect.std.1734198568" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1905116220" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
@@ -78,13 +80,10 @@
 						</toolChain>
 					</folderInfo>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.887148589" name="FuzzyMatchWrapper.cpp" rcbsApplicability="disable" resourcePath="TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1298504775">
-						<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1298504775" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
-					</fileInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
 					<sourceEntries>
-						<entry excluding="TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 120323a085..ada019da00 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1156,16 +1156,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalStructStatelessFF.h</locationURI>
 		</link>
-		<link>
-			<name>FF/LBLLM.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LBLLM.cpp</locationURI>
-		</link>
-		<link>
-			<name>FF/LBLLM.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LBLLM.h</locationURI>
-		</link>
 		<link>
 			<name>FF/LexicalReordering</name>
 			<type>2</type>
@@ -1536,6 +1526,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/Ken.h</locationURI>
 		</link>
+		<link>
+			<name>LM/LBLLM.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/LBLLM.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.h</locationURI>
+		</link>
 		<link>
 			<name>LM/LDHT.cpp</name>
 			<type>1</type>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 38fbccc890..d4021fd894 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -48,7 +48,6 @@
 #include "NieceTerminal.h"
 #include "SpanLength.h"
 #include "SyntaxRHS.h"
-#include "LBLLM.h"
 
 #include "moses/FF/SkeletonStatelessFF.h"
 #include "moses/FF/SkeletonStatefulFF.h"
@@ -94,6 +93,10 @@
 #include "moses/LM/DALMWrapper.h"
 #endif
 
+#ifdef LM_LBLLM
+#include "moses/LM/LBLLM.h"
+#endif
+
 #include "util/exception.hh"
 
 #include <vector>
@@ -205,7 +208,6 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME(SparseHieroReorderingFeature);
   MOSES_FNAME(SpanLength);
   MOSES_FNAME(SyntaxRHS);
-  MOSES_FNAME(LBLLM);
 
   MOSES_FNAME(SkeletonStatelessFF);
   MOSES_FNAME(SkeletonStatefulFF);
@@ -243,6 +245,9 @@ FeatureRegistry::FeatureRegistry()
 #ifdef LM_DALM
   MOSES_FNAME2("DALM", LanguageModelDALM);
 #endif
+#ifdef LM_LBLLM
+  MOSES_FNAME(LBLLM);
+#endif
 
   Add("KENLM", new KenFactory());
 }
diff --git a/moses/FF/LBLLM.cpp b/moses/LM/LBLLM.cpp
similarity index 70%
rename from moses/FF/LBLLM.cpp
rename to moses/LM/LBLLM.cpp
index c57d19acc0..8ee425a0e7 100644
--- a/moses/FF/LBLLM.cpp
+++ b/moses/LM/LBLLM.cpp
@@ -19,7 +19,8 @@ int LBLLMState::Compare(const FFState& other) const
 
 ////////////////////////////////////////////////////////////////
 LBLLM::LBLLM(const std::string &line)
-  :StatefulFeatureFunction(3, line)
+:StatefulFeatureFunction(3, line)
+,lm(ModelData(), oxlm::Dict(), true)
 {
   ReadParameters();
 }
@@ -38,38 +39,6 @@ void LBLLM::Load()
 	  ia >> lm;
 	  dict = lm.label_set();
 	}
-    /*
-    {
-      ifstream z_ifile((lm_file+".z").c_str(), ios::in);
-      if (!z_ifile.good()) {
-        cerr << "Failed to open " << (lm_file+".z") << " for reading\n";
-        abort();
-      }
-      cerr << "Reading LM Z from " << lm_file+".z" << " ...\n";
-      boost::archive::text_iarchive ia(z_ifile);
-      ia >> z_approx;
-    }
-    */
-
-    cerr << "Initializing map contents (map size=" << dict.max() << ")\n";
-    for (int i = 1; i < dict.max(); ++i)
-      AddToWordMap(i);
-    cerr << "Done.\n";
-    ss_off = OrderToStateSize(kORDER)-1;  // offset of "state size" member
-    FeatureFunction::SetStateSize(OrderToStateSize(kORDER));
-    kSTART = dict.Convert("<s>");
-    kSTOP = dict.Convert("</s>");
-    kUNKNOWN = dict.Convert("_UNK_");
-    kNONE = -1;
-    kSTAR = dict.Convert("<{STAR}>");
-    last_id = 0;
-
-    // optional online "adaptation" by training on previous references
-    if (reffile.size()) {
-      cerr << "Reference file: " << reffile << endl;
-      set<WordID> rv;
-      oxlm::ReadFromFile(reffile, &dict, &ref_sents, &rv);
-    }
 
 }
 
diff --git a/moses/FF/LBLLM.h b/moses/LM/LBLLM.h
similarity index 93%
rename from moses/FF/LBLLM.h
rename to moses/LM/LBLLM.h
index 3a667a0f87..fa087c4ba9 100644
--- a/moses/FF/LBLLM.h
+++ b/moses/LM/LBLLM.h
@@ -1,8 +1,11 @@
 #pragma once
 
 #include <string>
-#include "StatefulFeatureFunction.h"
-#include "FFState.h"
+#include "moses/FF/StatefulFeatureFunction.h"
+#include "moses/FF/FFState.h"
+
+// pyp stuff
+#include "corpus/corpus.h"
 #include "lbl/nlm.h"
 
 namespace Moses

From 3c9e397132691a101ea9e919782c93056babb3c0 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 10 Jul 2014 16:14:22 +0100
Subject: [PATCH 42/84] compiles with eclipse

---
 .../other-builds/moses-chart-cmd/.cproject    |  4 +
 contrib/other-builds/moses/.cproject          |  3 +-
 moses/FF/Factory.cpp                          | 16 +++-
 moses/LM/Jamfile                              | 10 +++
 moses/LM/LBLLM.cpp                            | 76 -------------------
 moses/LM/LBLLM.h                              | 71 ++++++++++++++---
 6 files changed, 89 insertions(+), 91 deletions(-)

diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject
index 0d720dbc27..848329d58e 100644
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@@ -48,6 +48,7 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.816413868" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.330225535" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../oxlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
@@ -68,6 +69,9 @@
 								</option>
 								<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="lbl"/>
+									<listOptionValue builtIn="false" value="murmurhash"/>
+									<listOptionValue builtIn="false" value="gomp"/>
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="dalm"/>
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 1a06fe7a8b..e0d9928b21 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -56,6 +56,7 @@
 									<listOptionValue builtIn="false" value="LM_IRST"/>
 									<listOptionValue builtIn="false" value="LM_DALM"/>
 									<listOptionValue builtIn="false" value="LM_NPLM"/>
+									<listOptionValue builtIn="false" value="LM_LBL"/>
 									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
 									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
 								</option>
@@ -80,8 +81,8 @@
 						</toolChain>
 					</folderInfo>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
 					<sourceEntries>
 						<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index d4021fd894..21490cc2ef 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -93,8 +93,16 @@
 #include "moses/LM/DALMWrapper.h"
 #endif
 
-#ifdef LM_LBLLM
+#ifdef LM_LBL
 #include "moses/LM/LBLLM.h"
+#include "corpus/corpus.h"
+#include "lbl/cdec_lbl_mapper.h"
+#include "lbl/cdec_rule_converter.h"
+#include "lbl/cdec_state_converter.h"
+#include "lbl/lbl_features.h"
+#include "lbl/model.h"
+#include "lbl/process_identifier.h"
+#include "lbl/query_cache.h"
 #endif
 
 #include "util/exception.hh"
@@ -245,8 +253,10 @@ FeatureRegistry::FeatureRegistry()
 #ifdef LM_DALM
   MOSES_FNAME2("DALM", LanguageModelDALM);
 #endif
-#ifdef LM_LBLLM
-  MOSES_FNAME(LBLLM);
+#ifdef LM_LBL
+  MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
+  MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
+  MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
 #endif
 
   Add("KENLM", new KenFactory());
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 4f964ddd83..dd351223bd 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -90,6 +90,16 @@ if $(with-nplm) {
   lmmacros += LM_NEURAL ;
 }
 
+#LBLLM
+local with-lbllm = [ option.get "with-lbllm" ] ;
+if $(with-lbllm) {
+  lib lblLM : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
+  obj LBLLM.o : LBLLM.cpp neuralLM ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+  alias lbllm : LBLLM.o lblLM : : : <cxxflags>-std=c++0x <linkflags>-std=c++0x <define>LM_LBL ;
+  dependencies += lbllm ;
+  lmmacros += LM_LBL ;
+}
+
 #DALM
 local with-dalm = [ option.get "with-dalm" ] ;
 if $(with-dalm) {
diff --git a/moses/LM/LBLLM.cpp b/moses/LM/LBLLM.cpp
index 8ee425a0e7..823ace339e 100644
--- a/moses/LM/LBLLM.cpp
+++ b/moses/LM/LBLLM.cpp
@@ -18,83 +18,7 @@ int LBLLMState::Compare(const FFState& other) const
 }
 
 ////////////////////////////////////////////////////////////////
-LBLLM::LBLLM(const std::string &line)
-:StatefulFeatureFunction(3, line)
-,lm(ModelData(), oxlm::Dict(), true)
-{
-  ReadParameters();
-}
-
-void LBLLM::Load()
-{
-	{
-	  cerr << "Reading LM from " << m_lmPath << " ...\n";
-	  //ifstream ifile(lm_file.c_str(), ios::in | ios::binary);
-	  ifstream ifile(m_lmPath.c_str(), ios::in);
-	  if (!ifile.good()) {
-	    cerr << "Failed to open " << m_lmPath << " for reading\n";
-	    abort();
-	  }
-	  boost::archive::text_iarchive ia(ifile);
-	  ia >> lm;
-	  dict = lm.label_set();
-	}
-
-}
-
-void LBLLM::EvaluateInIsolation(const Phrase &source
-                                  , const TargetPhrase &targetPhrase
-                                  , ScoreComponentCollection &scoreBreakdown
-                                  , ScoreComponentCollection &estimatedFutureScore) const
-{}
-
-void LBLLM::EvaluateWithSourceContext(const InputType &input
-                                  , const InputPath &inputPath
-                                  , const TargetPhrase &targetPhrase
-                                  , const StackVec *stackVec
-                                  , ScoreComponentCollection &scoreBreakdown
-                                  , ScoreComponentCollection *estimatedFutureScore) const
-{}
-
-FFState* LBLLM::EvaluateWhenApplied(
-  const Hypothesis& cur_hypo,
-  const FFState* prev_state,
-  ScoreComponentCollection* accumulator) const
-{
-  // dense scores
-  vector<float> newScores(m_numScoreComponents);
-  newScores[0] = 1.5;
-  newScores[1] = 0.3;
-  newScores[2] = 0.4;
-  accumulator->PlusEquals(this, newScores);
-
-  // sparse scores
-  accumulator->PlusEquals(this, "sparse-name", 2.4);
 
-  // int targetLen = cur_hypo.GetCurrTargetPhrase().GetSize(); // ??? [UG]
-  return new LBLLMState(0);
-}
-
-FFState* LBLLM::EvaluateWhenApplied(
-  const ChartHypothesis& /* cur_hypo */,
-  int /* featureID - used to index the state in the previous hypotheses */,
-  ScoreComponentCollection* accumulator) const
-{
-  return new LBLLMState(0);
-}
-
-void LBLLM::SetParameter(const std::string& key, const std::string& value)
-{
-  if (key == "lm-file") {
-	  m_lmPath = value;
-  }
-  else if (key == "ref-file") {
-	  m_refPath = value;
-  }
-  else {
-    StatefulFeatureFunction::SetParameter(key, value);
-  }
-}
 
 }
 
diff --git a/moses/LM/LBLLM.h b/moses/LM/LBLLM.h
index fa087c4ba9..e244460610 100644
--- a/moses/LM/LBLLM.h
+++ b/moses/LM/LBLLM.h
@@ -1,12 +1,20 @@
 #pragma once
 
 #include <string>
+#include <boost/shared_ptr.hpp>
 #include "moses/FF/StatefulFeatureFunction.h"
 #include "moses/FF/FFState.h"
 
-// pyp stuff
+// lbl stuff
 #include "corpus/corpus.h"
-#include "lbl/nlm.h"
+#include "lbl/lbl_features.h"
+#include "lbl/model.h"
+#include "lbl/process_identifier.h"
+#include "lbl/query_cache.h"
+
+#include "lbl/cdec_lbl_mapper.h"
+#include "lbl/cdec_rule_converter.h"
+#include "lbl/cdec_state_converter.h"
 
 namespace Moses
 {
@@ -21,11 +29,22 @@ class LBLLMState : public FFState
   int Compare(const FFState& other) const;
 };
 
+template<class Model>
 class LBLLM : public StatefulFeatureFunction
 {
 public:
-  LBLLM(const std::string &line);
-  void Load();
+	LBLLM(const std::string &line)
+	:StatefulFeatureFunction(3, line)
+	{
+	  ReadParameters();
+	}
+
+  void Load()
+  {
+    model.load(m_path);
+
+
+  }
 
   bool IsUseable(const FactorMask &mask) const {
     return true;
@@ -37,29 +56,59 @@ class LBLLM : public StatefulFeatureFunction
   void EvaluateInIsolation(const Phrase &source
                 , const TargetPhrase &targetPhrase
                 , ScoreComponentCollection &scoreBreakdown
-                , ScoreComponentCollection &estimatedFutureScore) const;
+                , ScoreComponentCollection &estimatedFutureScore) const
+  {
+
+  }
+
   void EvaluateWithSourceContext(const InputType &input
                 , const InputPath &inputPath
                 , const TargetPhrase &targetPhrase
                 , const StackVec *stackVec
                 , ScoreComponentCollection &scoreBreakdown
-                , ScoreComponentCollection *estimatedFutureScore = NULL) const;
+                , ScoreComponentCollection *estimatedFutureScore = NULL) const
+  {
+
+  }
+
   FFState* EvaluateWhenApplied(
     const Hypothesis& cur_hypo,
     const FFState* prev_state,
-    ScoreComponentCollection* accumulator) const;
+    ScoreComponentCollection* accumulator) const
+  {
+
+  }
+
   FFState* EvaluateWhenApplied(
     const ChartHypothesis& /* cur_hypo */,
     int /* featureID - used to index the state in the previous hypotheses */,
-    ScoreComponentCollection* accumulator) const;
+    ScoreComponentCollection* accumulator) const
+  {
+
+  }
+
+  void SetParameter(const std::string& key, const std::string& value)
+  {
+    if (key == "path") {
+  	  m_path = value;
+    }
+    else {
+      StatefulFeatureFunction::SetParameter(key, value);
+    }
+  }
 
-  void SetParameter(const std::string& key, const std::string& value);
 
 protected:
-  std::string m_lmPath, m_refPath;
+  std::string m_path;
 
+  int fid;
+  int fidOOV;
   oxlm::Dict dict;
-  oxlm::FactoredOutputNLM lm;
+  boost::shared_ptr<oxlm::ModelData> config;
+  Model model;
+  boost::shared_ptr<oxlm::CdecLBLMapper> mapper;
+  boost::shared_ptr<oxlm::CdecRuleConverter> ruleConverter;
+  boost::shared_ptr<oxlm::CdecStateConverter> stateConverter;
 
 };
 

From a209d0ed184e74117f3aaf2c9261e473cae98457 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 10 Jul 2014 18:48:51 +0100
Subject: [PATCH 43/84] compiles with eclipse

---
 contrib/other-builds/moses/.project | 15 ++++++++++++
 moses/LM/LBLLM.h                    | 36 ++++++++++++++++++++++++----
 moses/LM/oxlm/Mapper.cpp            | 37 +++++++++++++++++++++++++++++
 moses/LM/oxlm/Mapper.h              | 26 ++++++++++++++++++++
 4 files changed, 109 insertions(+), 5 deletions(-)
 create mode 100644 moses/LM/oxlm/Mapper.cpp
 create mode 100644 moses/LM/oxlm/Mapper.h

diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index ada019da00..1041bf24e9 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1646,6 +1646,11 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/backward.arpa</locationURI>
 		</link>
+		<link>
+			<name>LM/oxlm</name>
+			<type>2</type>
+			<locationURI>virtual:/virtual</locationURI>
+		</link>
 		<link>
 			<name>PP/CountsPhraseProperty.cpp</name>
 			<type>1</type>
@@ -1966,6 +1971,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.h</locationURI>
 		</link>
+		<link>
+			<name>LM/oxlm/Mapper.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/oxlm/Mapper.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/Mapper.h</locationURI>
+		</link>
 		<link>
 			<name>TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp</name>
 			<type>1</type>
diff --git a/moses/LM/LBLLM.h b/moses/LM/LBLLM.h
index e244460610..3adabc23d6 100644
--- a/moses/LM/LBLLM.h
+++ b/moses/LM/LBLLM.h
@@ -16,6 +16,8 @@
 #include "lbl/cdec_rule_converter.h"
 #include "lbl/cdec_state_converter.h"
 
+#include "oxlm/Mapper.h"
+
 namespace Moses
 {
 
@@ -34,7 +36,7 @@ class LBLLM : public StatefulFeatureFunction
 {
 public:
 	LBLLM(const std::string &line)
-	:StatefulFeatureFunction(3, line)
+	:StatefulFeatureFunction(2, line)
 	{
 	  ReadParameters();
 	}
@@ -43,7 +45,23 @@ class LBLLM : public StatefulFeatureFunction
   {
     model.load(m_path);
 
-
+    config = model.getConfig();
+    int context_width = config->ngram_order - 1;
+    // For each state, we store at most context_width word ids to the left and
+    // to the right and a kSTAR separator. The last bit represents the actual
+    // size of the state.
+    //int max_state_size = (2 * context_width + 1) * sizeof(int) + 1;
+    //FeatureFunction::SetStateSize(max_state_size);
+
+    dict = model.getDict();
+    mapper = boost::make_shared<OXLMMapper>(dict);
+    //stateConverter = boost::make_shared<CdecStateConverter>(max_state_size - 1);
+    //ruleConverter = boost::make_shared<CdecRuleConverter>(mapper, stateConverter);
+
+    kSTART = dict.Convert("<s>");
+    kSTOP = dict.Convert("</s>");
+    kUNKNOWN = dict.Convert("<unk>");
+    kSTAR = dict.Convert("<{STAR}>");
   }
 
   bool IsUseable(const FactorMask &mask) const {
@@ -80,8 +98,8 @@ class LBLLM : public StatefulFeatureFunction
   }
 
   FFState* EvaluateWhenApplied(
-    const ChartHypothesis& /* cur_hypo */,
-    int /* featureID - used to index the state in the previous hypotheses */,
+    const ChartHypothesis &cur_hypo,
+    int featureID,
     ScoreComponentCollection* accumulator) const
   {
 
@@ -106,9 +124,17 @@ class LBLLM : public StatefulFeatureFunction
   oxlm::Dict dict;
   boost::shared_ptr<oxlm::ModelData> config;
   Model model;
-  boost::shared_ptr<oxlm::CdecLBLMapper> mapper;
+
+  boost::shared_ptr<OXLMMapper> mapper;
+  /*
   boost::shared_ptr<oxlm::CdecRuleConverter> ruleConverter;
   boost::shared_ptr<oxlm::CdecStateConverter> stateConverter;
+  */
+
+  int kSTART;
+  int kSTOP;
+  int kUNKNOWN;
+  int kSTAR;
 
 };
 
diff --git a/moses/LM/oxlm/Mapper.cpp b/moses/LM/oxlm/Mapper.cpp
new file mode 100644
index 0000000000..43719e83ef
--- /dev/null
+++ b/moses/LM/oxlm/Mapper.cpp
@@ -0,0 +1,37 @@
+#include "Mapper.h"
+#include "moses/FactorCollection.h"
+
+using namespace std;
+
+namespace Moses
+{
+OXLMMapper::OXLMMapper(const oxlm::Dict& dict) : dict(dict)
+{
+  for (int i = 0; i < dict.size(); ++i) {
+	const string &str = dict.Convert(i);
+	FactorCollection &fc = FactorCollection::Instance();
+	const Moses::Factor *factor = fc.AddFactor(str, false);
+	moses2lbl[factor] = i;
+
+    //add(i, TD::Convert());
+  }
+
+  kUNKNOWN = this->dict.Convert("<unk>");
+}
+
+int OXLMMapper::convert(const Moses::Factor *factor) const
+{
+	Coll::const_iterator iter;
+	iter = moses2lbl.find(factor);
+	if (iter == moses2lbl.end()) {
+		return kUNKNOWN;
+	}
+	else {
+		int ret = iter->second;
+		return ret;
+	}
+}
+
+
+} // namespace
+
diff --git a/moses/LM/oxlm/Mapper.h b/moses/LM/oxlm/Mapper.h
new file mode 100644
index 0000000000..92fbc26fda
--- /dev/null
+++ b/moses/LM/oxlm/Mapper.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <map>
+#include "corpus/corpus.h"
+#include "moses/Factor.h"
+
+namespace Moses
+{
+class OXLMMapper
+{
+public:
+ OXLMMapper(const oxlm::Dict& dict);
+
+ int convert(const Moses::Factor *factor) const;
+
+private:
+ void add(int lbl_id, int cdec_id);
+
+ oxlm::Dict dict;
+ typedef std::map<const Moses::Factor*, int> Coll;
+ Coll moses2lbl;
+ int kUNKNOWN;
+
+};
+
+}

From da825e844ba446641672d726d4ba9118d7c5b458 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 11 Jul 2014 11:15:06 +0100
Subject: [PATCH 44/84] 1st rough sketch. compiles with eclipse

---
 moses/ChartHypothesis.cpp | 32 ++++++++++++++++
 moses/ChartHypothesis.h   |  4 ++
 moses/LM/LBLLM.cpp        | 10 +++--
 moses/LM/LBLLM.h          | 80 ++++++++++++++++++++++++++++++++++++++-
 moses/LM/oxlm/Mapper.cpp  | 12 ++++++
 moses/LM/oxlm/Mapper.h    |  2 +
 6 files changed, 135 insertions(+), 5 deletions(-)

diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp
index 2bcc480e77..dec3bd308e 100644
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@@ -149,6 +149,38 @@ Phrase ChartHypothesis::GetOutputPhrase() const
   return outPhrase;
 }
 
+void ChartHypothesis::GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const
+{
+  int targetSize = GetCurrTargetPhrase().GetSize();
+  for (int i = 0; i < targetSize; ++i) {
+	int pos;
+	if (leftRightMost == 1) {
+	  pos = i;
+	}
+	else if (leftRightMost == 2) {
+	  pos = targetSize - i;
+	}
+	else {
+		abort();
+	}
+
+	const Word &word = GetCurrTargetPhrase().GetWord(pos);
+
+	if (word.IsNonTerminal()) {
+	  // non-term. fill out with prev hypo
+	  size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
+	  const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
+	  prevHypo->GetOutputPhrase(outPhrase);
+	} else {
+	  outPhrase.AddWord(word);
+	}
+
+	if (outPhrase.GetSize() >= numWords) {
+		return;
+	}
+  }
+}
+
 /** check, if two hypothesis can be recombined.
     this is actually a sorting function that allows us to
     keep an ordered list of hypotheses. This makes recombination
diff --git a/moses/ChartHypothesis.h b/moses/ChartHypothesis.h
index 12050e7647..3f159d2224 100644
--- a/moses/ChartHypothesis.h
+++ b/moses/ChartHypothesis.h
@@ -138,6 +138,10 @@ class ChartHypothesis
   void GetOutputPhrase(Phrase &outPhrase) const;
   Phrase GetOutputPhrase() const;
 
+  // get leftmost/rightmost words only
+  // leftRightMost: 1=left, 2=right
+  void GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const;
+
   int RecombineCompare(const ChartHypothesis &compare) const;
 
   void Evaluate();
diff --git a/moses/LM/LBLLM.cpp b/moses/LM/LBLLM.cpp
index 823ace339e..bfa7635b83 100644
--- a/moses/LM/LBLLM.cpp
+++ b/moses/LM/LBLLM.cpp
@@ -12,9 +12,13 @@ int LBLLMState::Compare(const FFState& other) const
 {
   const LBLLMState &otherState = static_cast<const LBLLMState&>(other);
 
-  if (m_targetLen == otherState.m_targetLen)
-    return 0;
-  return (m_targetLen < otherState.m_targetLen) ? -1 : +1;
+  if (m_left != otherState.m_left) {
+	  return (m_left < otherState.m_left) ? -1 : +1;
+  }
+  else if (m_right != otherState.m_right) {
+	  return (m_right < otherState.m_right) ? -1 : +1;
+  }
+  return 0;
 }
 
 ////////////////////////////////////////////////////////////////
diff --git a/moses/LM/LBLLM.h b/moses/LM/LBLLM.h
index 3adabc23d6..6c1149f4a8 100644
--- a/moses/LM/LBLLM.h
+++ b/moses/LM/LBLLM.h
@@ -4,6 +4,8 @@
 #include <boost/shared_ptr.hpp>
 #include "moses/FF/StatefulFeatureFunction.h"
 #include "moses/FF/FFState.h"
+#include "moses/Util.h"
+#include "moses/ChartHypothesis.h"
 
 // lbl stuff
 #include "corpus/corpus.h"
@@ -23,7 +25,7 @@ namespace Moses
 
 class LBLLMState : public FFState
 {
-  int m_targetLen;
+  std::vector<int> m_left, m_right;
 public:
   LBLLMState(int targetLen)
   {}
@@ -31,12 +33,31 @@ class LBLLMState : public FFState
   int Compare(const FFState& other) const;
 };
 
+/**
+ * Wraps the feature values computed from the LBL language model.
+ */
+struct LBLFeatures {
+  LBLFeatures() : LMScore(0), OOVScore(0) {}
+  LBLFeatures(double lm_score, double oov_score)
+      : LMScore(lm_score), OOVScore(oov_score) {}
+  LBLFeatures& operator+=(const LBLFeatures& other) {
+    LMScore += other.LMScore;
+    OOVScore += other.OOVScore;
+    return *this;
+  }
+
+  double LMScore;
+  double OOVScore;
+};
+
+// FF class
 template<class Model>
 class LBLLM : public StatefulFeatureFunction
 {
 public:
 	LBLLM(const std::string &line)
 	:StatefulFeatureFunction(2, line)
+	,m_order(5)
 	{
 	  ReadParameters();
 	}
@@ -98,11 +119,26 @@ class LBLLM : public StatefulFeatureFunction
   }
 
   FFState* EvaluateWhenApplied(
-    const ChartHypothesis &cur_hypo,
+    const ChartHypothesis &hypo,
     int featureID,
     ScoreComponentCollection* accumulator) const
   {
+	  std::vector<int> leftIds, rightIds;
+	  Phrase leftPhrase, rightPhrase;
+	  hypo.GetOutputPhrase(1, m_order, leftPhrase);
+	  hypo.GetOutputPhrase(2, m_order, rightPhrase);
+
+	  leftIds = mapper->convert(leftPhrase);
+	  rightIds = mapper->convert(rightPhrase);
 
+	  LBLFeatures leftScores = scoreFullContexts(leftIds);
+	  LBLFeatures rightScores = scoreFullContexts(rightIds);
+
+	  std::vector<float> scores(2);
+	  scores[0] = leftScores.LMScore + rightScores.LMScore;
+	  scores[1] = leftScores.OOVScore + rightScores.OOVScore;
+
+	  accumulator->PlusEquals(this, scores);
   }
 
   void SetParameter(const std::string& key, const std::string& value)
@@ -110,6 +146,9 @@ class LBLLM : public StatefulFeatureFunction
     if (key == "path") {
   	  m_path = value;
     }
+    else if (key == "order") {
+      m_order = Scan<int>(value);
+    }
     else {
       StatefulFeatureFunction::SetParameter(key, value);
     }
@@ -118,6 +157,7 @@ class LBLLM : public StatefulFeatureFunction
 
 protected:
   std::string m_path;
+  int m_order;
 
   int fid;
   int fidOOV;
@@ -136,6 +176,42 @@ class LBLLM : public StatefulFeatureFunction
   int kUNKNOWN;
   int kSTAR;
 
+  ////////////////////////////////////
+  LBLFeatures scoreFullContexts(const vector<int>& symbols) const {
+    LBLFeatures ret;
+    int last_star = -1;
+    int context_width = config->ngram_order - 1;
+    for (size_t i = 0; i < symbols.size(); ++i) {
+      if (symbols[i] == kSTAR) {
+        last_star = i;
+      } else if (i - last_star > context_width) {
+        ret += scoreContext(symbols, i);
+      }
+    }
+
+    return ret;
+  }
+
+  LBLFeatures scoreContext(const vector<int>& symbols, int position) const {
+    int word = symbols[position];
+    int context_width = config->ngram_order - 1;
+    vector<int> context;
+    for (int i = 1; i <= context_width && position - i >= 0; ++i) {
+      assert(symbols[position - i] != kSTAR);
+      context.push_back(symbols[position - i]);
+    }
+
+    if (!context.empty() && context.back() == kSTART) {
+      context.resize(context_width, kSTART);
+    } else {
+      context.resize(context_width, kUNKNOWN);
+    }
+
+    double score;
+    score = model.predict(word, context);
+    return LBLFeatures(score, word == kUNKNOWN);
+  }
+
 };
 
 
diff --git a/moses/LM/oxlm/Mapper.cpp b/moses/LM/oxlm/Mapper.cpp
index 43719e83ef..09407816d3 100644
--- a/moses/LM/oxlm/Mapper.cpp
+++ b/moses/LM/oxlm/Mapper.cpp
@@ -32,6 +32,18 @@ int OXLMMapper::convert(const Moses::Factor *factor) const
 	}
 }
 
+std::vector<int> OXLMMapper::convert(const Phrase &phrase) const
+{
+	size_t size = phrase.GetSize();
+	vector<int> ret(size);
+
+	for (size_t i = 0; i < size; ++i) {
+		const Moses::Factor *factor = phrase.GetFactor(i, 0);
+		int id = convert(factor);
+		ret[i] = id;
+	}
+	return ret;
+}
 
 } // namespace
 
diff --git a/moses/LM/oxlm/Mapper.h b/moses/LM/oxlm/Mapper.h
index 92fbc26fda..b77bbff120 100644
--- a/moses/LM/oxlm/Mapper.h
+++ b/moses/LM/oxlm/Mapper.h
@@ -3,6 +3,7 @@
 #include <map>
 #include "corpus/corpus.h"
 #include "moses/Factor.h"
+#include "moses/Phrase.h"
 
 namespace Moses
 {
@@ -12,6 +13,7 @@ class OXLMMapper
  OXLMMapper(const oxlm::Dict& dict);
 
  int convert(const Moses::Factor *factor) const;
+ std::vector<int> convert(const Phrase &phrase) const;
 
 private:
  void add(int lbl_id, int cdec_id);

From 8f5dc2b9e7949af747b22b85714265e8f7ee481c Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 11 Jul 2014 12:53:03 +0100
Subject: [PATCH 45/84] state info

---
 moses/FF/Factory.cpp |  8 --------
 moses/LM/Jamfile     |  2 +-
 moses/LM/LBLLM.h     | 12 ++++++++++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 21490cc2ef..eedf90e802 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -95,14 +95,6 @@
 
 #ifdef LM_LBL
 #include "moses/LM/LBLLM.h"
-#include "corpus/corpus.h"
-#include "lbl/cdec_lbl_mapper.h"
-#include "lbl/cdec_rule_converter.h"
-#include "lbl/cdec_state_converter.h"
-#include "lbl/lbl_features.h"
-#include "lbl/model.h"
-#include "lbl/process_identifier.h"
-#include "lbl/query_cache.h"
 #endif
 
 #include "util/exception.hh"
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index dd351223bd..33f85d7596 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -94,7 +94,7 @@ if $(with-nplm) {
 local with-lbllm = [ option.get "with-lbllm" ] ;
 if $(with-lbllm) {
   lib lblLM : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
-  obj LBLLM.o : LBLLM.cpp neuralLM ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+  obj LBLLM.o : LBLLM.cpp lblLM ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
   alias lbllm : LBLLM.o lblLM : : : <cxxflags>-std=c++0x <linkflags>-std=c++0x <define>LM_LBL ;
   dependencies += lbllm ;
   lmmacros += LM_LBL ;
diff --git a/moses/LM/LBLLM.h b/moses/LM/LBLLM.h
index 6c1149f4a8..267fcb9c3f 100644
--- a/moses/LM/LBLLM.h
+++ b/moses/LM/LBLLM.h
@@ -27,7 +27,12 @@ class LBLLMState : public FFState
 {
   std::vector<int> m_left, m_right;
 public:
-  LBLLMState(int targetLen)
+  LBLLMState()
+  {}
+
+  LBLLMState(const std::vector<int> &left, const std::vector<int> &right)
+  :m_left(left)
+  ,m_right(right)
   {}
 
   int Compare(const FFState& other) const;
@@ -89,7 +94,7 @@ class LBLLM : public StatefulFeatureFunction
     return true;
   }
   virtual const FFState* EmptyHypothesisState(const InputType &input) const {
-    return new LBLLMState(0);
+    return new LBLLMState();
   }
 
   void EvaluateInIsolation(const Phrase &source
@@ -139,6 +144,9 @@ class LBLLM : public StatefulFeatureFunction
 	  scores[1] = leftScores.OOVScore + rightScores.OOVScore;
 
 	  accumulator->PlusEquals(this, scores);
+
+	  LBLLMState *state = new LBLLMState(leftIds, rightIds);
+	  return state;
   }
 
   void SetParameter(const std::string& key, const std::string& value)

From a402523ef57f09f72d5ab3b59967047de08d64b3 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 11 Jul 2014 16:26:48 +0100
Subject: [PATCH 46/84] calculate baseline score without optimisation

---
 contrib/other-builds/moses/.project |  10 ++
 moses/ChartHypothesis.cpp           |  10 +-
 moses/LM/LBLLM.h                    |  21 +++++
 moses/PP/Factory.cpp                |   2 +
 moses/PP/NonTermContextProperty.cpp | 137 ++++++++++++++++++++++++++++
 moses/PP/NonTermContextProperty.h   |  73 +++++++++++++++
 moses/Word.cpp                      |   3 +-
 7 files changed, 250 insertions(+), 6 deletions(-)
 create mode 100644 moses/PP/NonTermContextProperty.cpp
 create mode 100644 moses/PP/NonTermContextProperty.h

diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 1041bf24e9..88818c1b13 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1671,6 +1671,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/Factory.h</locationURI>
 		</link>
+		<link>
+			<name>PP/NonTermContextProperty.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/NonTermContextProperty.cpp</locationURI>
+		</link>
+		<link>
+			<name>PP/NonTermContextProperty.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/NonTermContextProperty.h</locationURI>
+		</link>
 		<link>
 			<name>PP/PhraseProperty.cpp</name>
 			<type>1</type>
diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp
index dec3bd308e..7b32559b7e 100644
--- a/moses/ChartHypothesis.cpp
+++ b/moses/ChartHypothesis.cpp
@@ -151,24 +151,26 @@ Phrase ChartHypothesis::GetOutputPhrase() const
 
 void ChartHypothesis::GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const
 {
-  int targetSize = GetCurrTargetPhrase().GetSize();
+  const TargetPhrase &tp = GetCurrTargetPhrase();
+
+  int targetSize = tp.GetSize();
   for (int i = 0; i < targetSize; ++i) {
 	int pos;
 	if (leftRightMost == 1) {
 	  pos = i;
 	}
 	else if (leftRightMost == 2) {
-	  pos = targetSize - i;
+	  pos = targetSize - i - 1;
 	}
 	else {
 		abort();
 	}
 
-	const Word &word = GetCurrTargetPhrase().GetWord(pos);
+	const Word &word = tp.GetWord(pos);
 
 	if (word.IsNonTerminal()) {
 	  // non-term. fill out with prev hypo
-	  size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
+	  size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
 	  const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
 	  prevHypo->GetOutputPhrase(outPhrase);
 	} else {
diff --git a/moses/LM/LBLLM.h b/moses/LM/LBLLM.h
index 267fcb9c3f..f1fc6eaafe 100644
--- a/moses/LM/LBLLM.h
+++ b/moses/LM/LBLLM.h
@@ -128,6 +128,7 @@ class LBLLM : public StatefulFeatureFunction
     int featureID,
     ScoreComponentCollection* accumulator) const
   {
+	  /*
 	  std::vector<int> leftIds, rightIds;
 	  Phrase leftPhrase, rightPhrase;
 	  hypo.GetOutputPhrase(1, m_order, leftPhrase);
@@ -147,6 +148,26 @@ class LBLLM : public StatefulFeatureFunction
 
 	  LBLLMState *state = new LBLLMState(leftIds, rightIds);
 	  return state;
+	*/
+
+	  // baseline non-optimized scoring
+	  Phrase phrase;
+	  hypo.GetOutputPhrase(phrase);
+	  std::cerr << "phrase=" << phrase << std::endl;
+
+	  std::vector<int> ids;
+	  ids = mapper->convert(phrase);
+
+	  LBLFeatures leftScores = scoreFullContexts(ids);
+	  std::vector<float> scores(2);
+	  scores[0] = leftScores.LMScore;
+	  scores[1] = leftScores.OOVScore;
+
+	  accumulator->Assign(this, scores);
+
+	  LBLLMState *state = new LBLLMState();
+	  return state;
+
   }
 
   void SetParameter(const std::string& key, const std::string& value)
diff --git a/moses/PP/Factory.cpp b/moses/PP/Factory.cpp
index 497eabaff2..4e9bfbf0ef 100644
--- a/moses/PP/Factory.cpp
+++ b/moses/PP/Factory.cpp
@@ -8,6 +8,7 @@
 #include "moses/PP/SourceLabelsPhraseProperty.h"
 #include "moses/PP/TreeStructurePhraseProperty.h"
 #include "moses/PP/SpanLengthPhraseProperty.h"
+#include "moses/PP/NonTermContextProperty.h"
 
 namespace Moses
 {
@@ -57,6 +58,7 @@ PhrasePropertyFactory::PhrasePropertyFactory()
   MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
   MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
   MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
+  MOSES_PNAME2("NonTermContext", NonTermContextProperty);
 }
 
 PhrasePropertyFactory::~PhrasePropertyFactory()
diff --git a/moses/PP/NonTermContextProperty.cpp b/moses/PP/NonTermContextProperty.cpp
new file mode 100644
index 0000000000..df5e88d8e0
--- /dev/null
+++ b/moses/PP/NonTermContextProperty.cpp
@@ -0,0 +1,137 @@
+#include "moses/PP/NonTermContextProperty.h"
+#include <string>
+#include <assert.h>
+#include "moses/Util.h"
+#include "moses/FactorCollection.h"
+
+using namespace std;
+
+namespace Moses
+{
+NonTermContextProperty::NonTermContextProperty()
+{
+}
+
+NonTermContextProperty::~NonTermContextProperty()
+{
+	//RemoveAllInColl(m_probStores);
+}
+
+void NonTermContextProperty::ProcessValue(const std::string &value)
+{
+  vector<string> toks;
+  Tokenize(toks, value);
+
+  FactorCollection &fc = FactorCollection::Instance();
+
+  size_t numNT = Scan<size_t>(toks[0]);
+  m_probStores.resize(numNT);
+
+  size_t ind = 1;
+  while (ind < toks.size()) {
+	  vector<const Factor *> factors;
+
+	  for (size_t nt = 0; nt < numNT; ++nt) {
+		  size_t ntInd = Scan<size_t>(toks[ind]);
+		  assert(nt == ntInd);
+		  ++ind;
+
+		  for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
+			//cerr << "toks[" << ind << "]=" << toks[ind] << endl;
+  			  const Factor *factor = fc.AddFactor(toks[ind], false);
+			  factors.push_back(factor);
+			  ++ind;
+		  }
+	  }
+
+	  // done with the context. Just get the count and put it all into data structures
+	  // cerr << "count=" << toks[ind] << endl;
+          float count = Scan<float>(toks[ind]);
+          ++ind;
+
+	  for (size_t i = 0; i < factors.size(); ++i) {
+		  size_t ntInd = i / 4;
+		  size_t contextInd = i % 4;
+		  const Factor *factor = factors[i];
+		  AddToMap(ntInd, contextInd, factor, count);
+	  }
+  }
+}
+
+void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count)
+{
+  if (ntIndex <= m_probStores.size()) {
+	  m_probStores.resize(ntIndex + 1);
+  }
+
+  ProbStore &probStore = m_probStores[ntIndex];
+  probStore.AddToMap(index, factor, count);
+}
+
+float NonTermContextProperty::GetProb(size_t ntInd,
+			size_t contextInd,
+			const Factor *factor,
+			float smoothConstant) const
+{
+	UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
+	const ProbStore &probStore = m_probStores[ntInd];
+	float ret = probStore.GetProb(contextInd, factor, smoothConstant);
+	return ret;
+}
+
+//////////////////////////////////////////
+
+void NonTermContextProperty::ProbStore::AddToMap(size_t index, const Factor *factor, float count)
+{
+	Map &map = m_vec[index];
+
+	Map::iterator iter = map.find(factor);
+	if (iter == map.end()) {
+		map[factor] = count;
+	}
+	else {
+		float &currCount = iter->second;
+		currCount += count;
+	}
+
+	m_totalCount += count;
+}
+
+
+float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
+			const Factor *factor,
+			float smoothConstant) const
+{
+  float count = GetCount(contextInd, factor, smoothConstant);
+  float total = GetTotalCount(contextInd, smoothConstant);
+  float ret = count / total;
+  return ret;
+}
+
+float NonTermContextProperty::ProbStore::GetCount(size_t contextInd,
+			const Factor *factor,
+			float smoothConstant) const
+{
+	const Map &map = m_vec[contextInd];
+
+	float count = smoothConstant;
+	Map::const_iterator iter = map.find(factor);
+	if (iter == map.end()) {
+		// nothing
+	}
+	else {
+		count += iter->second;
+	}
+
+	return count;
+}
+
+float NonTermContextProperty::ProbStore::GetTotalCount(size_t contextInd, float smoothConstant) const
+{
+	const Map &map = m_vec[contextInd];
+	return m_totalCount + smoothConstant * map.size();
+}
+
+
+} // namespace Moses
+
diff --git a/moses/PP/NonTermContextProperty.h b/moses/PP/NonTermContextProperty.h
new file mode 100644
index 0000000000..56db9cb323
--- /dev/null
+++ b/moses/PP/NonTermContextProperty.h
@@ -0,0 +1,73 @@
+
+#pragma once
+
+#include "moses/PP/PhraseProperty.h"
+#include "util/exception.hh"
+#include <string>
+#include <list>
+#include <map>
+#include <vector>
+
+namespace Moses
+{
+class Factor;
+
+class NonTermContextProperty : public PhraseProperty
+{
+public:
+
+  NonTermContextProperty();
+  ~NonTermContextProperty();
+
+  virtual void ProcessValue(const std::string &value);
+
+  virtual const std::string *GetValueString() const {
+    UTIL_THROW2("NonTermContextProperty: value string not available in this phrase property");
+    return NULL;
+  };
+
+  float GetProb(size_t ntInd,
+		  size_t contextInd,
+		  const Factor *factor,
+		  float smoothConstant) const;
+
+protected:
+
+  class ProbStore {
+	  typedef std::map<const Factor*, float> Map; // map word -> prob
+	  typedef std::vector<Map> Vec; // left outside, left inside, right inside, right outside
+	  Vec m_vec;
+	  float m_totalCount;
+
+	  float GetCount(size_t contextInd,
+			  const Factor *factor,
+			  float smoothConstant) const;
+	  float GetTotalCount(size_t contextInd, float smoothConstant) const;
+
+  public:
+
+	  ProbStore()
+	  :m_vec(4)
+  	  ,m_totalCount(0)
+	  {}
+
+	  float GetProb(size_t contextInd,
+			  const Factor *factor,
+			  float smoothConstant) const;
+
+	  float GetSize(size_t index) const
+	  { return m_vec[index].size(); }
+
+	  void AddToMap(size_t index, const Factor *factor, float count);
+
+  };
+
+  // by nt index
+  std::vector<ProbStore> m_probStores;
+
+  void AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count);
+
+};
+
+} // namespace Moses
+
diff --git a/moses/Word.cpp b/moses/Word.cpp
index 04cbdb6a7c..b1ea77059e 100644
--- a/moses/Word.cpp
+++ b/moses/Word.cpp
@@ -139,8 +139,7 @@ CreateFromString(FactorDirection direction
 		      << " contains factor delimiter " 
 		      << StaticData::Instance().GetFactorDelimiter() 
 		      << " too many times.");
-      
-      UTIL_THROW_IF(i < factorOrder.size(),util::Exception, 
+      UTIL_THROW_IF(!isNonTerminal && i < factorOrder.size(),util::Exception,
 		    "Too few factors in string '" << str << "'.");
     }
   else

From a3bd695cd4559416ce83194bf9ab6ea83dc8c961 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Sun, 13 Jul 2014 02:54:58 +0100
Subject: [PATCH 47/84] factor for oov is 0, not <unk> - interferes with source
 input. Add extra argument to lowercase input words or not

---
 .../training/wrappers/make-factor-brown-cluster-mkcls.perl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
index 60f341de87..13aa7f912f 100755
--- a/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
+++ b/scripts/training/wrappers/make-factor-brown-cluster-mkcls.perl
@@ -2,7 +2,7 @@
 
 use strict;
 
-my ($cluster_file,$in,$out,$tmp) = @ARGV;
+my ($lowercase, $cluster_file,$in,$out,$tmp) = @ARGV;
 
 my $CLUSTER = &read_cluster_from_mkcls($cluster_file);
 
@@ -17,7 +17,10 @@
   s/ $//;
   my $first = 1;
   foreach my $word (split) {
-    my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "<unk>";
+    if ($lowercase) {
+      $word = lc($word);
+    }
+    my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "0";
     print OUT " " unless $first;
     print OUT $cluster;
     $first = 0;

From 0d764e89414d6844400b24ec65c06b9d5f378739 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Sun, 13 Jul 2014 20:50:30 +0100
Subject: [PATCH 48/84] don't require oxlm

---
 contrib/other-builds/moses-chart-cmd/.cproject | 4 ----
 contrib/other-builds/moses/.cproject           | 7 +++----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject
index 848329d58e..0d720dbc27 100644
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@@ -48,7 +48,6 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.816413868" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.330225535" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../oxlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
@@ -69,9 +68,6 @@
 								</option>
 								<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="moses"/>
-									<listOptionValue builtIn="false" value="lbl"/>
-									<listOptionValue builtIn="false" value="murmurhash"/>
-									<listOptionValue builtIn="false" value="gomp"/>
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="dalm"/>
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index e0d9928b21..c586797761 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -41,7 +41,6 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../oxlm/src&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen-3&quot;"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
 								</option>
@@ -56,7 +55,6 @@
 									<listOptionValue builtIn="false" value="LM_IRST"/>
 									<listOptionValue builtIn="false" value="LM_DALM"/>
 									<listOptionValue builtIn="false" value="LM_NPLM"/>
-									<listOptionValue builtIn="false" value="LM_LBL"/>
 									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
 									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
 								</option>
@@ -81,10 +79,11 @@
 						</toolChain>
 					</folderInfo>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.957797720" name="LBLLM.h" rcbsApplicability="disable" resourcePath="LM/LBLLM.h" toolsToInvoke=""/>
 					<sourceEntries>
-						<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="LM/LBLLM.h|LM/LBLLM.cpp|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>

From f340ede6b58ae61d2586c9da3d32fbe6cd9bdda4 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 14 Jul 2014 19:15:20 +0100
Subject: [PATCH 49/84] new LBLLM based on LanguageModel class

---
 contrib/other-builds/CreateOnDiskPt/.cproject |  4 ++
 contrib/other-builds/moses/.project           | 10 ++++
 moses/FF/Factory.cpp                          | 12 +++-
 moses/LM/oxlm/LBLLM2.cpp                      | 12 ++++
 moses/LM/oxlm/LBLLM2.h                        | 58 +++++++++++++++++++
 5 files changed, 93 insertions(+), 3 deletions(-)
 create mode 100644 moses/LM/oxlm/LBLLM2.cpp
 create mode 100644 moses/LM/oxlm/LBLLM2.h

diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject
index 4c46d70f86..e114255db2 100644
--- a/contrib/other-builds/CreateOnDiskPt/.cproject
+++ b/contrib/other-builds/CreateOnDiskPt/.cproject
@@ -42,6 +42,9 @@
 								<option id="gnu.cpp.link.option.libs.1325292383" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="lbl"/>
+									<listOptionValue builtIn="false" value="murmurhash"/>
+									<listOptionValue builtIn="false" value="gomp"/>
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="flm"/>
@@ -64,6 +67,7 @@
 								</option>
 								<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../oxlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 88818c1b13..9567448725 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1981,6 +1981,16 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.h</locationURI>
 		</link>
+		<link>
+			<name>LM/oxlm/LBLLM2.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM2.cpp</locationURI>
+		</link>
+		<link>
+			<name>LM/oxlm/LBLLM2.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM2.h</locationURI>
+		</link>
 		<link>
 			<name>LM/oxlm/Mapper.cpp</name>
 			<type>1</type>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index eedf90e802..aad9829f3e 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -95,6 +95,7 @@
 
 #ifdef LM_LBL
 #include "moses/LM/LBLLM.h"
+#include "moses/LM/oxlm/LBLLM2.h"
 #endif
 
 #include "util/exception.hh"
@@ -246,9 +247,14 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME2("DALM", LanguageModelDALM);
 #endif
 #ifdef LM_LBL
-  MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
-  MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
-  MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
+  MOSES_FNAME2("LBLLM-LM.old", LBLLM<oxlm::LM>);
+  MOSES_FNAME2("LBLLM-FactoredLM.old", LBLLM<oxlm::FactoredLM>);
+  MOSES_FNAME2("LBLLM-FactoredMaxentLM.old", LBLLM<oxlm::FactoredMaxentLM>);
+
+  MOSES_FNAME2("LBLLM-LM", LBLLM2<oxlm::LM>);
+  MOSES_FNAME2("LBLLM-FactoredLM", LBLLM2<oxlm::FactoredLM>);
+  MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM2<oxlm::FactoredMaxentLM>);
+
 #endif
 
   Add("KENLM", new KenFactory());
diff --git a/moses/LM/oxlm/LBLLM2.cpp b/moses/LM/oxlm/LBLLM2.cpp
new file mode 100644
index 0000000000..34dc526310
--- /dev/null
+++ b/moses/LM/oxlm/LBLLM2.cpp
@@ -0,0 +1,12 @@
+
+#include "LBLLM2.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+}
+
+
+
diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
new file mode 100644
index 0000000000..e1debbeae1
--- /dev/null
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -0,0 +1,58 @@
+// $Id$
+#pragma once
+
+#include <vector>
+#include "moses/LM/SingleFactor.h"
+#include "moses/FactorCollection.h"
+
+namespace Moses
+{
+
+template<class Model>
+class LBLLM2 : public LanguageModelSingleFactor
+{
+protected:
+
+public:
+	LBLLM2(const std::string &line)
+	:LanguageModelSingleFactor(line)
+	{
+		ReadParameters();
+
+		FactorCollection &factorCollection = FactorCollection::Instance();
+
+		// needed by parent language model classes. Why didn't they set these themselves?
+		m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
+		m_sentenceStartWord[m_factorType] = m_sentenceStart;
+
+		m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
+		m_sentenceEndWord[m_factorType] = m_sentenceEnd;
+	}
+
+  ~LBLLM2()
+  {}
+
+  virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const
+  {
+    LMResult ret;
+    ret.score = contextFactor.size();
+    ret.unknown = false;
+
+    // use last word as state info
+    const Factor *factor;
+    size_t hash_value(const Factor &f);
+    if (contextFactor.size()) {
+      factor = contextFactor.back()->GetFactor(m_factorType);
+    } else {
+      factor = NULL;
+    }
+
+    (*finalState) = (State*) factor;
+
+    return ret;
+  }
+
+};
+
+
+}

From 7dfe943fb04a0769e2d55e30c0164e94c34c8240 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 14 Jul 2014 20:40:33 +0100
Subject: [PATCH 50/84] new LBLLM based on LanguageModel class

---
 .../other-builds/moses-chart-cmd/.cproject    |  4 ++
 contrib/other-builds/moses/.cproject          |  6 ++-
 moses/LM/oxlm/LBLLM2.h                        | 47 +++++++++++++++++++
 3 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject
index 0d720dbc27..848329d58e 100644
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@@ -48,6 +48,7 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.816413868" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.330225535" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../oxlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
@@ -68,6 +69,9 @@
 								</option>
 								<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="moses"/>
+									<listOptionValue builtIn="false" value="lbl"/>
+									<listOptionValue builtIn="false" value="murmurhash"/>
+									<listOptionValue builtIn="false" value="gomp"/>
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="dalm"/>
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index c586797761..aaffd32d21 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -42,6 +42,7 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen-3&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../oxlm/src&quot;"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
@@ -55,6 +56,7 @@
 									<listOptionValue builtIn="false" value="LM_IRST"/>
 									<listOptionValue builtIn="false" value="LM_DALM"/>
 									<listOptionValue builtIn="false" value="LM_NPLM"/>
+									<listOptionValue builtIn="false" value="LM_LBL"/>
 									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
 									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
 								</option>
@@ -79,11 +81,11 @@
 						</toolChain>
 					</folderInfo>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.957797720" name="LBLLM.h" rcbsApplicability="disable" resourcePath="LM/LBLLM.h" toolsToInvoke=""/>
 					<sourceEntries>
-						<entry excluding="LM/LBLLM.h|LM/LBLLM.cpp|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
index e1debbeae1..9432187534 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -5,6 +5,19 @@
 #include "moses/LM/SingleFactor.h"
 #include "moses/FactorCollection.h"
 
+// lbl stuff
+#include "corpus/corpus.h"
+#include "lbl/lbl_features.h"
+#include "lbl/model.h"
+#include "lbl/process_identifier.h"
+#include "lbl/query_cache.h"
+
+#include "lbl/cdec_lbl_mapper.h"
+#include "lbl/cdec_rule_converter.h"
+#include "lbl/cdec_state_converter.h"
+
+#include "Mapper.h"
+
 namespace Moses
 {
 
@@ -32,6 +45,30 @@ class LBLLM2 : public LanguageModelSingleFactor
   ~LBLLM2()
   {}
 
+  void Load()
+  {
+    model.load(m_path);
+
+    config = model.getConfig();
+    int context_width = config->ngram_order - 1;
+    // For each state, we store at most context_width word ids to the left and
+    // to the right and a kSTAR separator. The last bit represents the actual
+    // size of the state.
+    //int max_state_size = (2 * context_width + 1) * sizeof(int) + 1;
+    //FeatureFunction::SetStateSize(max_state_size);
+
+    dict = model.getDict();
+    mapper = boost::make_shared<OXLMMapper>(dict);
+    //stateConverter = boost::make_shared<CdecStateConverter>(max_state_size - 1);
+    //ruleConverter = boost::make_shared<CdecRuleConverter>(mapper, stateConverter);
+
+    kSTART = dict.Convert("<s>");
+    kSTOP = dict.Convert("</s>");
+    kUNKNOWN = dict.Convert("<unk>");
+    kSTAR = dict.Convert("<{STAR}>");
+  }
+
+
   virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const
   {
     LMResult ret;
@@ -52,6 +89,16 @@ class LBLLM2 : public LanguageModelSingleFactor
     return ret;
   }
 
+protected:
+  oxlm::Dict dict;
+  boost::shared_ptr<oxlm::ModelData> config;
+  Model model;
+
+  int kSTART;
+  int kSTOP;
+  int kUNKNOWN;
+  int kSTAR;
+
 };
 
 

From dc1cf603de830778ba483ceb49400e414df50f94 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 14 Jul 2014 21:33:38 +0100
Subject: [PATCH 51/84] new LBLLM based on LanguageModel class

---
 moses/LM/LBLLM.h         | 16 ----------
 moses/LM/oxlm/LBLLM2.h   | 64 +++++++++++++++++++++++++++++++---------
 moses/LM/oxlm/Mapper.cpp | 14 +++++++++
 moses/LM/oxlm/Mapper.h   | 18 +++++++++++
 4 files changed, 82 insertions(+), 30 deletions(-)

diff --git a/moses/LM/LBLLM.h b/moses/LM/LBLLM.h
index f1fc6eaafe..6a940d8147 100644
--- a/moses/LM/LBLLM.h
+++ b/moses/LM/LBLLM.h
@@ -38,22 +38,6 @@ class LBLLMState : public FFState
   int Compare(const FFState& other) const;
 };
 
-/**
- * Wraps the feature values computed from the LBL language model.
- */
-struct LBLFeatures {
-  LBLFeatures() : LMScore(0), OOVScore(0) {}
-  LBLFeatures(double lm_score, double oov_score)
-      : LMScore(lm_score), OOVScore(oov_score) {}
-  LBLFeatures& operator+=(const LBLFeatures& other) {
-    LMScore += other.LMScore;
-    OOVScore += other.OOVScore;
-    return *this;
-  }
-
-  double LMScore;
-  double OOVScore;
-};
 
 // FF class
 template<class Model>
diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
index 9432187534..5b6e288a0b 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -21,6 +21,7 @@
 namespace Moses
 {
 
+
 template<class Model>
 class LBLLM2 : public LanguageModelSingleFactor
 {
@@ -47,7 +48,7 @@ class LBLLM2 : public LanguageModelSingleFactor
 
   void Load()
   {
-    model.load(m_path);
+    model.load(m_filePath);
 
     config = model.getConfig();
     int context_width = config->ngram_order - 1;
@@ -71,21 +72,18 @@ class LBLLM2 : public LanguageModelSingleFactor
 
   virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const
   {
-    LMResult ret;
-    ret.score = contextFactor.size();
-    ret.unknown = false;
-
-    // use last word as state info
-    const Factor *factor;
-    size_t hash_value(const Factor &f);
-    if (contextFactor.size()) {
-      factor = contextFactor.back()->GetFactor(m_factorType);
-    } else {
-      factor = NULL;
-    }
+    std::vector<int> ids;
+    ids = mapper->convert(contextFactor);
+    int word = ids.back();
+
+    double score;
+    score = model.predict(word, ids);
 
-    (*finalState) = (State*) factor;
+    LMResult ret;
+    ret.score = score;
+    ret.unknown = (word == kUNKNOWN);
 
+    (*finalState) = (State*) 0;
     return ret;
   }
 
@@ -99,6 +97,44 @@ class LBLLM2 : public LanguageModelSingleFactor
   int kUNKNOWN;
   int kSTAR;
 
+  boost::shared_ptr<OXLMMapper> mapper;
+
+  ////////////////////////////////////
+  LBLFeatures scoreFullContexts(const vector<int>& symbols) const {
+    LBLFeatures ret;
+    int last_star = -1;
+    int context_width = config->ngram_order - 1;
+    for (size_t i = 0; i < symbols.size(); ++i) {
+      if (symbols[i] == kSTAR) {
+        last_star = i;
+      } else if (i - last_star > context_width) {
+        ret += scoreContext(symbols, i);
+      }
+    }
+
+    return ret;
+  }
+
+  LBLFeatures scoreContext(const vector<int>& symbols, int position) const {
+    int word = symbols[position];
+    int context_width = config->ngram_order - 1;
+    vector<int> context;
+    for (int i = 1; i <= context_width && position - i >= 0; ++i) {
+      assert(symbols[position - i] != kSTAR);
+      context.push_back(symbols[position - i]);
+    }
+
+    if (!context.empty() && context.back() == kSTART) {
+      context.resize(context_width, kSTART);
+    } else {
+      context.resize(context_width, kUNKNOWN);
+    }
+
+    double score;
+    score = model.predict(word, context);
+    return LBLFeatures(score, word == kUNKNOWN);
+  }
+
 };
 
 
diff --git a/moses/LM/oxlm/Mapper.cpp b/moses/LM/oxlm/Mapper.cpp
index 09407816d3..d2bbd795b4 100644
--- a/moses/LM/oxlm/Mapper.cpp
+++ b/moses/LM/oxlm/Mapper.cpp
@@ -45,5 +45,19 @@ std::vector<int> OXLMMapper::convert(const Phrase &phrase) const
 	return ret;
 }
 
+std::vector<int> OXLMMapper::convert(const std::vector<const Word*> &contextFactor) const
+{
+	size_t size = contextFactor.size();
+	vector<int> ret(size);
+
+	for (size_t i = 0; i < size; ++i) {
+		const Moses::Factor *factor = contextFactor[i]->GetFactor(0);
+		int id = convert(factor);
+		ret[i] = id;
+	}
+	return ret;
+
+}
+
 } // namespace
 
diff --git a/moses/LM/oxlm/Mapper.h b/moses/LM/oxlm/Mapper.h
index b77bbff120..15367cf72a 100644
--- a/moses/LM/oxlm/Mapper.h
+++ b/moses/LM/oxlm/Mapper.h
@@ -14,6 +14,7 @@ class OXLMMapper
 
  int convert(const Moses::Factor *factor) const;
  std::vector<int> convert(const Phrase &phrase) const;
+ std::vector<int> convert(const std::vector<const Word*> &contextFactor) const;
 
 private:
  void add(int lbl_id, int cdec_id);
@@ -25,4 +26,21 @@ class OXLMMapper
 
 };
 
+/**
+ * Wraps the feature values computed from the LBL language model.
+ */
+struct LBLFeatures {
+  LBLFeatures() : LMScore(0), OOVScore(0) {}
+  LBLFeatures(double lm_score, double oov_score)
+      : LMScore(lm_score), OOVScore(oov_score) {}
+  LBLFeatures& operator+=(const LBLFeatures& other) {
+    LMScore += other.LMScore;
+    OOVScore += other.OOVScore;
+    return *this;
+  }
+
+  double LMScore;
+  double OOVScore;
+};
+
 }

From 20271b8e963605edfd9bbda4743bf04afeb426eb Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Mon, 14 Jul 2014 21:49:29 +0100
Subject: [PATCH 52/84] sort FF by name

---
 moses/FF/Factory.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index eedf90e802..97aac07e9c 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -278,12 +278,21 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
 
 void FeatureRegistry::PrintFF() const
 {
+	vector<string> ffs;
 	std::cerr << "Available feature functions:" << std::endl;
 	Map::const_iterator iter;
 	for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
 		const string &ffName = iter->first;
+		ffs.push_back(ffName);
+	}
+
+	vector<string>::const_iterator iterVec;
+	std::sort(ffs.begin(), ffs.end());
+	for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
+		const string &ffName = *iterVec;
 		std::cerr << ffName << " ";
 	}
+
 	std::cerr << std::endl;
 }
 

From 08be3b6b4f63a556f315f3a1da27b59709fb087d Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Tue, 15 Jul 2014 00:13:35 +0100
Subject: [PATCH 53/84] new LBLLM based on LanguageModel class

---
 moses/LM/oxlm/LBLLM2.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
index 5b6e288a0b..4999d92d88 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -12,10 +12,6 @@
 #include "lbl/process_identifier.h"
 #include "lbl/query_cache.h"
 
-#include "lbl/cdec_lbl_mapper.h"
-#include "lbl/cdec_rule_converter.h"
-#include "lbl/cdec_state_converter.h"
-
 #include "Mapper.h"
 
 namespace Moses
@@ -76,6 +72,9 @@ class LBLLM2 : public LanguageModelSingleFactor
     ids = mapper->convert(contextFactor);
     int word = ids.back();
 
+    size_t context_width = m_nGramOrder - 1;
+    ids.resize(context_width, kUNKNOWN);
+
     double score;
     score = model.predict(word, ids);
 

From 53073726d6ab281436606503f043a1530d933c06 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Tue, 15 Jul 2014 15:26:50 +0100
Subject: [PATCH 54/84] compiles with bjam

---
 Jamroot                   |  7 +++++++
 moses/Jamfile             |  9 ++++++++-
 moses/LM/Jamfile          |  9 ++++++---
 moses/LM/SingleFactor.cpp | 11 +++++++++++
 moses/LM/SingleFactor.h   |  2 ++
 moses/LM/oxlm/LBLLM2.h    |  6 ++++++
 6 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/Jamroot b/Jamroot
index 283b4dd6f9..0a146c5287 100644
--- a/Jamroot
+++ b/Jamroot
@@ -114,6 +114,13 @@ requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
 requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
 requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
 
+if [ option.get "with-lbllm" ] {
+  external-lib boost_serialization ;
+  external-lib gomp ;
+  requirements += <library>boost_serialization ;
+  requirements += <library>gomp ;
+}
+
 if [ option.get "with-cmph" ] {
   requirements += <define>HAVE_CMPH ;
 }
diff --git a/moses/Jamfile b/moses/Jamfile
index 011060943d..60ab877b0a 100644
--- a/moses/Jamfile
+++ b/moses/Jamfile
@@ -10,7 +10,14 @@ if $(with-dlib) {
   dlib = ;
 }
 
-alias headers : ../util//kenutil : : : $(max-factors) $(dlib) ; 
+with-lbllm = [ option.get "with-lbllm" ] ;
+if $(with-lbllm) {
+  lbllm2 = <cxxflags>-std=c++0x <define>LM_LBL <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+} else {
+  lbllm2 = ;
+}
+
+alias headers : ../util//kenutil : : : $(max-factors) $(dlib) $(lbllm2) ; 
 alias ThreadPool : ThreadPool.cpp ;
 alias Util : Util.cpp Timer.cpp ;
 
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 33f85d7596..5ab846e6d6 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -93,13 +93,16 @@ if $(with-nplm) {
 #LBLLM
 local with-lbllm = [ option.get "with-lbllm" ] ;
 if $(with-lbllm) {
-  lib lblLM : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
-  obj LBLLM.o : LBLLM.cpp lblLM ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
-  alias lbllm : LBLLM.o lblLM : : : <cxxflags>-std=c++0x <linkflags>-std=c++0x <define>LM_LBL ;
+  lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
+  obj LBLLM.o : LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+  obj LBLLM2.o : oxlm/LBLLM2.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+  obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+  alias lbllm : LBLLM.o LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
   dependencies += lbllm ;
   lmmacros += LM_LBL ;
 }
 
+
 #DALM
 local with-dalm = [ option.get "with-dalm" ] ;
 if $(with-dalm) {
diff --git a/moses/LM/SingleFactor.cpp b/moses/LM/SingleFactor.cpp
index 74b8f4fe5b..1efb13f16e 100644
--- a/moses/LM/SingleFactor.cpp
+++ b/moses/LM/SingleFactor.cpp
@@ -87,6 +87,17 @@ void LanguageModelSingleFactor::SetParameter(const std::string& key, const std::
   }
 }
 
+std::string LanguageModelSingleFactor::DebugContextFactor(const std::vector<const Word*> &contextFactor) const
+{
+	std::string ret;
+	for (size_t i = 0; i < contextFactor.size(); ++i) {
+		const Word &word = *contextFactor[i];
+		ret += word.ToString();
+	}
+
+	return ret;
+}
+
 }
 
 
diff --git a/moses/LM/SingleFactor.h b/moses/LM/SingleFactor.h
index eeb5cdbef5..fd1d893e6a 100644
--- a/moses/LM/SingleFactor.h
+++ b/moses/LM/SingleFactor.h
@@ -67,6 +67,8 @@ class LanguageModelSingleFactor : public LanguageModelImplementation
   virtual LMResult GetValueForgotState(const std::vector<const Word*> &contextFactor, FFState &outState) const;
 
   virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const = 0;
+
+  std::string DebugContextFactor(const std::vector<const Word*> &contextFactor) const;
 };
 
 
diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
index 4999d92d88..8942ab13bd 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -68,6 +68,11 @@ class LBLLM2 : public LanguageModelSingleFactor
 
   virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const
   {
+	std::string str = DebugContextFactor(contextFactor);
+	if (str == "unterstützende ") {
+		std::cerr << str << std::endl;
+	}
+
     std::vector<int> ids;
     ids = mapper->convert(contextFactor);
     int word = ids.back();
@@ -77,6 +82,7 @@ class LBLLM2 : public LanguageModelSingleFactor
 
     double score;
     score = model.predict(word, ids);
+	std::cerr << "contextFactor=" << str << " " << score << std::endl;
 
     LMResult ret;
     ret.score = score;

From 7cb8ef59330390d9896615cacd43e69ecb1a8556 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Tue, 15 Jul 2014 16:46:19 +0100
Subject: [PATCH 55/84] scores matches for 1st best, but not nbest

---
 moses/LM/oxlm/LBLLM2.h   | 61 +++++++++-------------------------------
 moses/LM/oxlm/Mapper.cpp | 14 +++++----
 moses/LM/oxlm/Mapper.h   |  2 +-
 3 files changed, 24 insertions(+), 53 deletions(-)

diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
index 8942ab13bd..7346788e49 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -68,21 +68,24 @@ class LBLLM2 : public LanguageModelSingleFactor
 
   virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const
   {
-	std::string str = DebugContextFactor(contextFactor);
-	if (str == "unterstützende ") {
-		std::cerr << str << std::endl;
-	}
-
-    std::vector<int> ids;
-    ids = mapper->convert(contextFactor);
-    int word = ids.back();
+    std::vector<int> context;
+    int word;
+    mapper->convert(contextFactor, context, word);
 
     size_t context_width = m_nGramOrder - 1;
-    ids.resize(context_width, kUNKNOWN);
+
+    if (!context.empty() && context.back() == kSTART) {
+      context.resize(context_width, kSTART);
+    } else {
+      context.resize(context_width, kUNKNOWN);
+    }
+
 
     double score;
-    score = model.predict(word, ids);
-	std::cerr << "contextFactor=" << str << " " << score << std::endl;
+    score = model.predict(word, context);
+
+	std::string str = DebugContextFactor(contextFactor);
+    std::cerr << "contextFactor=" << str << " " << score << std::endl;
 
     LMResult ret;
     ret.score = score;
@@ -104,42 +107,6 @@ class LBLLM2 : public LanguageModelSingleFactor
 
   boost::shared_ptr<OXLMMapper> mapper;
 
-  ////////////////////////////////////
-  LBLFeatures scoreFullContexts(const vector<int>& symbols) const {
-    LBLFeatures ret;
-    int last_star = -1;
-    int context_width = config->ngram_order - 1;
-    for (size_t i = 0; i < symbols.size(); ++i) {
-      if (symbols[i] == kSTAR) {
-        last_star = i;
-      } else if (i - last_star > context_width) {
-        ret += scoreContext(symbols, i);
-      }
-    }
-
-    return ret;
-  }
-
-  LBLFeatures scoreContext(const vector<int>& symbols, int position) const {
-    int word = symbols[position];
-    int context_width = config->ngram_order - 1;
-    vector<int> context;
-    for (int i = 1; i <= context_width && position - i >= 0; ++i) {
-      assert(symbols[position - i] != kSTAR);
-      context.push_back(symbols[position - i]);
-    }
-
-    if (!context.empty() && context.back() == kSTART) {
-      context.resize(context_width, kSTART);
-    } else {
-      context.resize(context_width, kUNKNOWN);
-    }
-
-    double score;
-    score = model.predict(word, context);
-    return LBLFeatures(score, word == kUNKNOWN);
-  }
-
 };
 
 
diff --git a/moses/LM/oxlm/Mapper.cpp b/moses/LM/oxlm/Mapper.cpp
index d2bbd795b4..f1363ccf0c 100644
--- a/moses/LM/oxlm/Mapper.cpp
+++ b/moses/LM/oxlm/Mapper.cpp
@@ -45,17 +45,21 @@ std::vector<int> OXLMMapper::convert(const Phrase &phrase) const
 	return ret;
 }
 
-std::vector<int> OXLMMapper::convert(const std::vector<const Word*> &contextFactor) const
+void OXLMMapper::convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const
 {
 	size_t size = contextFactor.size();
-	vector<int> ret(size);
 
-	for (size_t i = 0; i < size; ++i) {
+	ids.resize(size - 1);
+
+	for (size_t i = 0; i < size - 1; ++i) {
 		const Moses::Factor *factor = contextFactor[i]->GetFactor(0);
 		int id = convert(factor);
-		ret[i] = id;
+		ids[i] = id;
 	}
-	return ret;
+	std::reverse(ids.begin(), ids.end());
+
+	const Moses::Factor *factor = contextFactor.back()->GetFactor(0);
+	word = convert(factor);
 
 }
 
diff --git a/moses/LM/oxlm/Mapper.h b/moses/LM/oxlm/Mapper.h
index 15367cf72a..79cbf7b5f8 100644
--- a/moses/LM/oxlm/Mapper.h
+++ b/moses/LM/oxlm/Mapper.h
@@ -14,7 +14,7 @@ class OXLMMapper
 
  int convert(const Moses::Factor *factor) const;
  std::vector<int> convert(const Phrase &phrase) const;
- std::vector<int> convert(const std::vector<const Word*> &contextFactor) const;
+ void convert(const std::vector<const Word*> &contextFactor, std::vector<int> &ids, int &word) const;
 
 private:
  void add(int lbl_id, int cdec_id);

From 3b0ab6e6de466dfb30ba1332faf999c8fa2c86b4 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Tue, 15 Jul 2014 17:14:32 +0100
Subject: [PATCH 56/84] scores matches. State info from hash

---
 moses/LM/oxlm/LBLLM2.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
index 7346788e49..4026b28c6b 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include <vector>
+#include <boost/functional/hash.hpp>
 #include "moses/LM/SingleFactor.h"
 #include "moses/FactorCollection.h"
 
@@ -91,7 +92,15 @@ class LBLLM2 : public LanguageModelSingleFactor
     ret.score = score;
     ret.unknown = (word == kUNKNOWN);
 
-    (*finalState) = (State*) 0;
+    // calc state from hash of last n-1 words
+    size_t seed = 0;
+    boost::hash_combine(seed, word);
+    for (size_t i = 0; i < context.size() && i < context_width - 1; ++i) {
+    	int id = context[i];
+    	boost::hash_combine(seed, id);
+    }
+
+    (*finalState) = (State*) seed;
     return ret;
   }
 

From ce014e0b35bf136287ebb1757434cbbab50e6d0c Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Tue, 15 Jul 2014 17:47:36 +0100
Subject: [PATCH 57/84] cleanup

---
 contrib/other-builds/moses/.cproject |   1 -
 contrib/other-builds/moses/.project  |  10 --
 moses/FF/Factory.cpp                 |   6 -
 moses/LM/Jamfile                     |   3 +-
 moses/LM/LBLLM.cpp                   |  28 ----
 moses/LM/LBLLM.h                     | 232 ---------------------------
 moses/LM/oxlm/LBLLM2.h               |   4 +-
 7 files changed, 3 insertions(+), 281 deletions(-)
 delete mode 100644 moses/LM/LBLLM.cpp
 delete mode 100644 moses/LM/LBLLM.h

diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index aaffd32d21..69681a1367 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -83,7 +83,6 @@
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
-					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.957797720" name="LBLLM.h" rcbsApplicability="disable" resourcePath="LM/LBLLM.h" toolsToInvoke=""/>
 					<sourceEntries>
 						<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 9567448725..be2574de77 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1526,16 +1526,6 @@
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/Ken.h</locationURI>
 		</link>
-		<link>
-			<name>LM/LBLLM.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.cpp</locationURI>
-		</link>
-		<link>
-			<name>LM/LBLLM.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/LBLLM.h</locationURI>
-		</link>
 		<link>
 			<name>LM/LDHT.cpp</name>
 			<type>1</type>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 64efe77b27..0472ce1d3c 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -94,7 +94,6 @@
 #endif
 
 #ifdef LM_LBL
-#include "moses/LM/LBLLM.h"
 #include "moses/LM/oxlm/LBLLM2.h"
 #endif
 
@@ -247,14 +246,9 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME2("DALM", LanguageModelDALM);
 #endif
 #ifdef LM_LBL
-  MOSES_FNAME2("LBLLM-LM.old", LBLLM<oxlm::LM>);
-  MOSES_FNAME2("LBLLM-FactoredLM.old", LBLLM<oxlm::FactoredLM>);
-  MOSES_FNAME2("LBLLM-FactoredMaxentLM.old", LBLLM<oxlm::FactoredMaxentLM>);
-
   MOSES_FNAME2("LBLLM-LM", LBLLM2<oxlm::LM>);
   MOSES_FNAME2("LBLLM-FactoredLM", LBLLM2<oxlm::FactoredLM>);
   MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM2<oxlm::FactoredMaxentLM>);
-
 #endif
 
   Add("KENLM", new KenFactory());
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 5ab846e6d6..8b0bc5af4a 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -94,10 +94,9 @@ if $(with-nplm) {
 local with-lbllm = [ option.get "with-lbllm" ] ;
 if $(with-lbllm) {
   lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
-  obj LBLLM.o : LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
   obj LBLLM2.o : oxlm/LBLLM2.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
   obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
-  alias lbllm : LBLLM.o LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
+  alias lbllm : LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
   dependencies += lbllm ;
   lmmacros += LM_LBL ;
 }
diff --git a/moses/LM/LBLLM.cpp b/moses/LM/LBLLM.cpp
deleted file mode 100644
index bfa7635b83..0000000000
--- a/moses/LM/LBLLM.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <vector>
-#include <boost/archive/text_iarchive.hpp>
-#include "LBLLM.h"
-#include "moses/ScoreComponentCollection.h"
-#include "moses/Hypothesis.h"
-
-using namespace std;
-
-namespace Moses
-{
-int LBLLMState::Compare(const FFState& other) const
-{
-  const LBLLMState &otherState = static_cast<const LBLLMState&>(other);
-
-  if (m_left != otherState.m_left) {
-	  return (m_left < otherState.m_left) ? -1 : +1;
-  }
-  else if (m_right != otherState.m_right) {
-	  return (m_right < otherState.m_right) ? -1 : +1;
-  }
-  return 0;
-}
-
-////////////////////////////////////////////////////////////////
-
-
-}
-
diff --git a/moses/LM/LBLLM.h b/moses/LM/LBLLM.h
deleted file mode 100644
index 6a940d8147..0000000000
--- a/moses/LM/LBLLM.h
+++ /dev/null
@@ -1,232 +0,0 @@
-#pragma once
-
-#include <string>
-#include <boost/shared_ptr.hpp>
-#include "moses/FF/StatefulFeatureFunction.h"
-#include "moses/FF/FFState.h"
-#include "moses/Util.h"
-#include "moses/ChartHypothesis.h"
-
-// lbl stuff
-#include "corpus/corpus.h"
-#include "lbl/lbl_features.h"
-#include "lbl/model.h"
-#include "lbl/process_identifier.h"
-#include "lbl/query_cache.h"
-
-#include "lbl/cdec_lbl_mapper.h"
-#include "lbl/cdec_rule_converter.h"
-#include "lbl/cdec_state_converter.h"
-
-#include "oxlm/Mapper.h"
-
-namespace Moses
-{
-
-class LBLLMState : public FFState
-{
-  std::vector<int> m_left, m_right;
-public:
-  LBLLMState()
-  {}
-
-  LBLLMState(const std::vector<int> &left, const std::vector<int> &right)
-  :m_left(left)
-  ,m_right(right)
-  {}
-
-  int Compare(const FFState& other) const;
-};
-
-
-// FF class
-template<class Model>
-class LBLLM : public StatefulFeatureFunction
-{
-public:
-	LBLLM(const std::string &line)
-	:StatefulFeatureFunction(2, line)
-	,m_order(5)
-	{
-	  ReadParameters();
-	}
-
-  void Load()
-  {
-    model.load(m_path);
-
-    config = model.getConfig();
-    int context_width = config->ngram_order - 1;
-    // For each state, we store at most context_width word ids to the left and
-    // to the right and a kSTAR separator. The last bit represents the actual
-    // size of the state.
-    //int max_state_size = (2 * context_width + 1) * sizeof(int) + 1;
-    //FeatureFunction::SetStateSize(max_state_size);
-
-    dict = model.getDict();
-    mapper = boost::make_shared<OXLMMapper>(dict);
-    //stateConverter = boost::make_shared<CdecStateConverter>(max_state_size - 1);
-    //ruleConverter = boost::make_shared<CdecRuleConverter>(mapper, stateConverter);
-
-    kSTART = dict.Convert("<s>");
-    kSTOP = dict.Convert("</s>");
-    kUNKNOWN = dict.Convert("<unk>");
-    kSTAR = dict.Convert("<{STAR}>");
-  }
-
-  bool IsUseable(const FactorMask &mask) const {
-    return true;
-  }
-  virtual const FFState* EmptyHypothesisState(const InputType &input) const {
-    return new LBLLMState();
-  }
-
-  void EvaluateInIsolation(const Phrase &source
-                , const TargetPhrase &targetPhrase
-                , ScoreComponentCollection &scoreBreakdown
-                , ScoreComponentCollection &estimatedFutureScore) const
-  {
-
-  }
-
-  void EvaluateWithSourceContext(const InputType &input
-                , const InputPath &inputPath
-                , const TargetPhrase &targetPhrase
-                , const StackVec *stackVec
-                , ScoreComponentCollection &scoreBreakdown
-                , ScoreComponentCollection *estimatedFutureScore = NULL) const
-  {
-
-  }
-
-  FFState* EvaluateWhenApplied(
-    const Hypothesis& cur_hypo,
-    const FFState* prev_state,
-    ScoreComponentCollection* accumulator) const
-  {
-
-  }
-
-  FFState* EvaluateWhenApplied(
-    const ChartHypothesis &hypo,
-    int featureID,
-    ScoreComponentCollection* accumulator) const
-  {
-	  /*
-	  std::vector<int> leftIds, rightIds;
-	  Phrase leftPhrase, rightPhrase;
-	  hypo.GetOutputPhrase(1, m_order, leftPhrase);
-	  hypo.GetOutputPhrase(2, m_order, rightPhrase);
-
-	  leftIds = mapper->convert(leftPhrase);
-	  rightIds = mapper->convert(rightPhrase);
-
-	  LBLFeatures leftScores = scoreFullContexts(leftIds);
-	  LBLFeatures rightScores = scoreFullContexts(rightIds);
-
-	  std::vector<float> scores(2);
-	  scores[0] = leftScores.LMScore + rightScores.LMScore;
-	  scores[1] = leftScores.OOVScore + rightScores.OOVScore;
-
-	  accumulator->PlusEquals(this, scores);
-
-	  LBLLMState *state = new LBLLMState(leftIds, rightIds);
-	  return state;
-	*/
-
-	  // baseline non-optimized scoring
-	  Phrase phrase;
-	  hypo.GetOutputPhrase(phrase);
-	  std::cerr << "phrase=" << phrase << std::endl;
-
-	  std::vector<int> ids;
-	  ids = mapper->convert(phrase);
-
-	  LBLFeatures leftScores = scoreFullContexts(ids);
-	  std::vector<float> scores(2);
-	  scores[0] = leftScores.LMScore;
-	  scores[1] = leftScores.OOVScore;
-
-	  accumulator->Assign(this, scores);
-
-	  LBLLMState *state = new LBLLMState();
-	  return state;
-
-  }
-
-  void SetParameter(const std::string& key, const std::string& value)
-  {
-    if (key == "path") {
-  	  m_path = value;
-    }
-    else if (key == "order") {
-      m_order = Scan<int>(value);
-    }
-    else {
-      StatefulFeatureFunction::SetParameter(key, value);
-    }
-  }
-
-
-protected:
-  std::string m_path;
-  int m_order;
-
-  int fid;
-  int fidOOV;
-  oxlm::Dict dict;
-  boost::shared_ptr<oxlm::ModelData> config;
-  Model model;
-
-  boost::shared_ptr<OXLMMapper> mapper;
-  /*
-  boost::shared_ptr<oxlm::CdecRuleConverter> ruleConverter;
-  boost::shared_ptr<oxlm::CdecStateConverter> stateConverter;
-  */
-
-  int kSTART;
-  int kSTOP;
-  int kUNKNOWN;
-  int kSTAR;
-
-  ////////////////////////////////////
-  LBLFeatures scoreFullContexts(const vector<int>& symbols) const {
-    LBLFeatures ret;
-    int last_star = -1;
-    int context_width = config->ngram_order - 1;
-    for (size_t i = 0; i < symbols.size(); ++i) {
-      if (symbols[i] == kSTAR) {
-        last_star = i;
-      } else if (i - last_star > context_width) {
-        ret += scoreContext(symbols, i);
-      }
-    }
-
-    return ret;
-  }
-
-  LBLFeatures scoreContext(const vector<int>& symbols, int position) const {
-    int word = symbols[position];
-    int context_width = config->ngram_order - 1;
-    vector<int> context;
-    for (int i = 1; i <= context_width && position - i >= 0; ++i) {
-      assert(symbols[position - i] != kSTAR);
-      context.push_back(symbols[position - i]);
-    }
-
-    if (!context.empty() && context.back() == kSTART) {
-      context.resize(context_width, kSTART);
-    } else {
-      context.resize(context_width, kUNKNOWN);
-    }
-
-    double score;
-    score = model.predict(word, context);
-    return LBLFeatures(score, word == kUNKNOWN);
-  }
-
-};
-
-
-}
-
diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM2.h
index 4026b28c6b..d485aa80ba 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM2.h
@@ -63,7 +63,6 @@ class LBLLM2 : public LanguageModelSingleFactor
     kSTART = dict.Convert("<s>");
     kSTOP = dict.Convert("</s>");
     kUNKNOWN = dict.Convert("<unk>");
-    kSTAR = dict.Convert("<{STAR}>");
   }
 
 
@@ -85,8 +84,10 @@ class LBLLM2 : public LanguageModelSingleFactor
     double score;
     score = model.predict(word, context);
 
+    /*
 	std::string str = DebugContextFactor(contextFactor);
     std::cerr << "contextFactor=" << str << " " << score << std::endl;
+	*/
 
     LMResult ret;
     ret.score = score;
@@ -112,7 +113,6 @@ class LBLLM2 : public LanguageModelSingleFactor
   int kSTART;
   int kSTOP;
   int kUNKNOWN;
-  int kSTAR;
 
   boost::shared_ptr<OXLMMapper> mapper;
 

From d3d2bc675f61ba8206803259950043b642775a01 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 16 Jul 2014 08:17:24 +0100
Subject: [PATCH 58/84] compile error. /Pierre Lison

---
 mira/Main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mira/Main.cpp b/mira/Main.cpp
index c22a80ecea..70b5971c9a 100644
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@@ -665,7 +665,7 @@ int main(int argc, char** argv)
     }
 
     // number of weight dumps this epoch
-    // size_t weightMixingThisEpoch = 0;
+    size_t weightMixingThisEpoch = 0;
     size_t weightEpochDump = 0;
 
     size_t shardPosition = 0;

From 69c27a3020430990c84941e6483966b71f88b5d2 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 16 Jul 2014 13:21:51 +0100
Subject: [PATCH 59/84] name change

---
 contrib/other-builds/moses/.project     | 8 ++++----
 moses/FF/Factory.cpp                    | 8 ++++----
 moses/LM/Jamfile                        | 4 ++--
 moses/LM/oxlm/{LBLLM2.cpp => LBLLM.cpp} | 2 +-
 moses/LM/oxlm/{LBLLM2.h => LBLLM.h}     | 6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)
 rename moses/LM/oxlm/{LBLLM2.cpp => LBLLM.cpp} (70%)
 rename moses/LM/oxlm/{LBLLM2.h => LBLLM.h} (96%)

diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index be2574de77..da1e3c466b 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1972,14 +1972,14 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.h</locationURI>
 		</link>
 		<link>
-			<name>LM/oxlm/LBLLM2.cpp</name>
+			<name>LM/oxlm/LBLLM.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM2.cpp</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.cpp</locationURI>
 		</link>
 		<link>
-			<name>LM/oxlm/LBLLM2.h</name>
+			<name>LM/oxlm/LBLLM.h</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM2.h</locationURI>
+			<locationURI>PARENT-3-PROJECT_LOC/moses/LM/oxlm/LBLLM.h</locationURI>
 		</link>
 		<link>
 			<name>LM/oxlm/Mapper.cpp</name>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 0472ce1d3c..7eb53a40ae 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -94,7 +94,7 @@
 #endif
 
 #ifdef LM_LBL
-#include "moses/LM/oxlm/LBLLM2.h"
+#include "moses/LM/oxlm/LBLLM.h"
 #endif
 
 #include "util/exception.hh"
@@ -246,9 +246,9 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME2("DALM", LanguageModelDALM);
 #endif
 #ifdef LM_LBL
-  MOSES_FNAME2("LBLLM-LM", LBLLM2<oxlm::LM>);
-  MOSES_FNAME2("LBLLM-FactoredLM", LBLLM2<oxlm::FactoredLM>);
-  MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM2<oxlm::FactoredMaxentLM>);
+  MOSES_FNAME2("LBLLM-LM", LBLLM<oxlm::LM>);
+  MOSES_FNAME2("LBLLM-FactoredLM", LBLLM<oxlm::FactoredLM>);
+  MOSES_FNAME2("LBLLM-FactoredMaxentLM", LBLLM<oxlm::FactoredMaxentLM>);
 #endif
 
   Add("KENLM", new KenFactory());
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 8b0bc5af4a..87b0c1b36d 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -94,9 +94,9 @@ if $(with-nplm) {
 local with-lbllm = [ option.get "with-lbllm" ] ;
 if $(with-lbllm) {
   lib lbl : : <search>$(with-lbllm)/lib <search>$(with-lbllm)/lib64 ;
-  obj LBLLM2.o : oxlm/LBLLM2.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
+  obj LBLLM.o : oxlm/LBLLM.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
   obj Mapper.o : oxlm/Mapper.cpp lbl ..//headers : <include>$(with-lbllm)/src <include>$(with-lbllm)/3rdparty/eigen-3 ;
-  alias lbllm : LBLLM2.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
+  alias lbllm : LBLLM.o Mapper.o lbl : : : <cxxflags>-std=c++0x <define>LM_LBL ;
   dependencies += lbllm ;
   lmmacros += LM_LBL ;
 }
diff --git a/moses/LM/oxlm/LBLLM2.cpp b/moses/LM/oxlm/LBLLM.cpp
similarity index 70%
rename from moses/LM/oxlm/LBLLM2.cpp
rename to moses/LM/oxlm/LBLLM.cpp
index 34dc526310..20f1a21496 100644
--- a/moses/LM/oxlm/LBLLM2.cpp
+++ b/moses/LM/oxlm/LBLLM.cpp
@@ -1,5 +1,5 @@
 
-#include "LBLLM2.h"
+#include "LBLLM.h"
 
 using namespace std;
 
diff --git a/moses/LM/oxlm/LBLLM2.h b/moses/LM/oxlm/LBLLM.h
similarity index 96%
rename from moses/LM/oxlm/LBLLM2.h
rename to moses/LM/oxlm/LBLLM.h
index d485aa80ba..07ed9a8d3d 100644
--- a/moses/LM/oxlm/LBLLM2.h
+++ b/moses/LM/oxlm/LBLLM.h
@@ -20,12 +20,12 @@ namespace Moses
 
 
 template<class Model>
-class LBLLM2 : public LanguageModelSingleFactor
+class LBLLM : public LanguageModelSingleFactor
 {
 protected:
 
 public:
-	LBLLM2(const std::string &line)
+	LBLLM(const std::string &line)
 	:LanguageModelSingleFactor(line)
 	{
 		ReadParameters();
@@ -40,7 +40,7 @@ class LBLLM2 : public LanguageModelSingleFactor
 		m_sentenceEndWord[m_factorType] = m_sentenceEnd;
 	}
 
-  ~LBLLM2()
+  ~LBLLM()
   {}
 
   void Load()

From 8ef136a57cf5299c59d66ca9d9b0be34c46f0b16 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 16 Jul 2014 14:18:12 +0100
Subject: [PATCH 60/84] exclude oxlm from Eclipse build

---
 contrib/other-builds/CreateOnDiskPt/.cproject |   4 -
 .../other-builds/moses-chart-cmd/.cproject    |   4 -
 contrib/other-builds/moses/.cproject          | 108 +++++++++++++++++-
 3 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject
index e114255db2..4c46d70f86 100644
--- a/contrib/other-builds/CreateOnDiskPt/.cproject
+++ b/contrib/other-builds/CreateOnDiskPt/.cproject
@@ -42,9 +42,6 @@
 								<option id="gnu.cpp.link.option.libs.1325292383" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="OnDiskPt"/>
 									<listOptionValue builtIn="false" value="moses"/>
-									<listOptionValue builtIn="false" value="lbl"/>
-									<listOptionValue builtIn="false" value="murmurhash"/>
-									<listOptionValue builtIn="false" value="gomp"/>
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="flm"/>
@@ -67,7 +64,6 @@
 								</option>
 								<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../oxlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject
index 848329d58e..0d720dbc27 100644
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@@ -48,7 +48,6 @@
 							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.816413868" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
 								<option id="gnu.cpp.link.option.paths.330225535" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../oxlm/lib&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
@@ -69,9 +68,6 @@
 								</option>
 								<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
 									<listOptionValue builtIn="false" value="moses"/>
-									<listOptionValue builtIn="false" value="lbl"/>
-									<listOptionValue builtIn="false" value="murmurhash"/>
-									<listOptionValue builtIn="false" value="gomp"/>
 									<listOptionValue builtIn="false" value="irstlm"/>
 									<listOptionValue builtIn="false" value="dstruct"/>
 									<listOptionValue builtIn="false" value="dalm"/>
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 69681a1367..290e7527cb 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -36,13 +36,10 @@
 									<listOptionValue builtIn="false" value="/opt/local/include/"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../nplm/src&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen&quot;"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen-3&quot;"/>
-									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../oxlm/src&quot;"/>
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
@@ -56,7 +53,6 @@
 									<listOptionValue builtIn="false" value="LM_IRST"/>
 									<listOptionValue builtIn="false" value="LM_DALM"/>
 									<listOptionValue builtIn="false" value="LM_NPLM"/>
-									<listOptionValue builtIn="false" value="LM_LBL"/>
 									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
 									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
 								</option>
@@ -83,8 +79,18 @@
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
 					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
+					<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.2147044266" name="/" resourcePath="LM/oxlm">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.369597529" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
+							<tool id="cdt.managedbuild.tool.gnu.archiver.base.1919082719" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1976472988"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.451148054" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1343615699" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903"/>
+							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1929750597" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1168585173"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1235749054" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.2074660557"/>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.8387388" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.933467113"/>
+						</toolChain>
+					</folderInfo>
 					<sourceEntries>
-						<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+						<entry excluding="LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
 					</sourceEntries>
 				</configuration>
 			</storageModule>
@@ -136,6 +142,95 @@
 			</storageModule>
 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
 		</cconfiguration>
+		<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380">
+			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380" moduleId="org.eclipse.cdt.core.settings" name="Debug with oxlm">
+				<externalSettings>
+					<externalSetting>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
+						<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses/Debug with oxlm"/>
+						<entry flags="RESOLVED" kind="libraryFile" name="moses" srcPrefixMapping="" srcRootPath=""/>
+					</externalSetting>
+				</externalSettings>
+				<extensions>
+					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
+					<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
+					<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+				</extensions>
+			</storageModule>
+			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
+				<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380" name="Debug with oxlm" parent="cdt.managedbuild.config.gnu.exe.debug">
+					<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380." name="/" resourcePath="">
+						<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.58016517" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+							<targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.debug.1519676809" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+							<builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.210713286" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.gnu.archiver.base.352461864" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1930334119" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+								<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.287782778" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1878892542" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+								<option id="gnu.cpp.compiler.option.include.paths.1954109101" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../probingPT/helpers&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
+									<listOptionValue builtIn="false" value="/opt/local/include/"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../nplm/src&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen&quot;"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../eigen-3&quot;"/>
+									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../oxlm/src&quot;"/>
+									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.274233516" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="IS_ECLIPSE"/>
+									<listOptionValue builtIn="false" value="HAVE_PROBINGPT"/>
+									<listOptionValue builtIn="false" value="HAVE_BOOST"/>
+									<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
+									<listOptionValue builtIn="false" value="WITH_THREADS"/>
+									<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
+									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+									<listOptionValue builtIn="false" value="LM_IRST"/>
+									<listOptionValue builtIn="false" value="LM_DALM"/>
+									<listOptionValue builtIn="false" value="LM_NPLM"/>
+									<listOptionValue builtIn="false" value="LM_LBL"/>
+									<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
+									<listOptionValue builtIn="false" value="_LARGE_FILES"/>
+								</option>
+								<option id="gnu.cpp.compiler.option.dialect.std.1353163586" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1413141770" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1040012873" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+								<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.260276259" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+								<option id="gnu.c.compiler.exe.debug.option.debugging.level.744208673" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+								<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.699852884" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1114065632" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+							<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.373508964" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1873470979" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+									<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+									<additionalInput kind="additionalinput" paths="$(LIBS)"/>
+								</inputType>
+							</tool>
+							<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1046426871" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+								<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1482215763" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
+							</tool>
+						</toolChain>
+					</folderInfo>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/Rand.h" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/IRST.h" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
+					<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/DALMWrapper.h" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
+					<sourceEntries>
+						<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+					</sourceEntries>
+				</configuration>
+			</storageModule>
+			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
+		</cconfiguration>
 	</storageModule>
 	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
 		<project id="moses.cdt.managedbuild.target.gnu.exe.1375079569" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
@@ -174,6 +269,9 @@
 		<configuration configurationName="Debug">
 			<resource resourceType="PROJECT" workspacePath="/moses"/>
 		</configuration>
+		<configuration configurationName="Debug with oxlm">
+			<resource resourceType="PROJECT" workspacePath="/moses"/>
+		</configuration>
 	</storageModule>
 	<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>

From 0d8d77e3da4d341810125b465dcdd033556f2b96 Mon Sep 17 00:00:00 2001
From: Rico Sennrich <rico.sennrich@gmx.ch>
Date: Thu, 17 Jul 2014 16:10:30 +0100
Subject: [PATCH 61/84] add nplm wrapper from Kenneth's kenlm repo

---
 lm/wrappers/README  |  3 ++
 lm/wrappers/nplm.cc | 90 +++++++++++++++++++++++++++++++++++++++++++++
 lm/wrappers/nplm.hh | 83 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 176 insertions(+)
 create mode 100644 lm/wrappers/README
 create mode 100644 lm/wrappers/nplm.cc
 create mode 100644 lm/wrappers/nplm.hh

diff --git a/lm/wrappers/README b/lm/wrappers/README
new file mode 100644
index 0000000000..56c34c23e1
--- /dev/null
+++ b/lm/wrappers/README
@@ -0,0 +1,3 @@
+This directory is for wrappers around other people's LMs, presenting an interface similar to KenLM's.  You will need to have their LM installed.
+
+NPLM is a work in progress.  
diff --git a/lm/wrappers/nplm.cc b/lm/wrappers/nplm.cc
new file mode 100644
index 0000000000..70622bd2bc
--- /dev/null
+++ b/lm/wrappers/nplm.cc
@@ -0,0 +1,90 @@
+#include "lm/wrappers/nplm.hh"
+#include "util/exception.hh"
+#include "util/file.hh"
+
+#include <algorithm>
+
+#include <string.h>
+
+#include "neuralLM.h"
+
+namespace lm {
+namespace np {
+
+Vocabulary::Vocabulary(const nplm::vocabulary &vocab) 
+  : base::Vocabulary(vocab.lookup_word("<s>"), vocab.lookup_word("</s>"), vocab.lookup_word("<unk>")),
+    vocab_(vocab), null_word_(vocab.lookup_word("<null>")) {}
+
+Vocabulary::~Vocabulary() {}
+
+WordIndex Vocabulary::Index(const std::string &str) const {
+  return vocab_.lookup_word(str);
+}
+
+bool Model::Recognize(const std::string &name) {
+  try {
+    util::scoped_fd file(util::OpenReadOrThrow(name.c_str()));
+    char magic_check[16];
+    util::ReadOrThrow(file.get(), magic_check, sizeof(magic_check));
+    const char nnlm_magic[] = "\\config\nversion ";
+    return !memcmp(magic_check, nnlm_magic, 16);
+  } catch (const util::Exception &) {
+    return false;
+  }
+} 
+
+Model::Model(const std::string &file, std::size_t cache) 
+  : base_instance_(new nplm::neuralLM(file)), vocab_(base_instance_->get_vocabulary()), cache_size_(cache) {
+  UTIL_THROW_IF(base_instance_->get_order() > NPLM_MAX_ORDER, util::Exception, "This NPLM has order " << (unsigned int)base_instance_->get_order() << " but the KenLM wrapper was compiled with " << NPLM_MAX_ORDER << ".  Change the defintion of NPLM_MAX_ORDER and recompile.");
+  // log10 compatible with backoff models.
+  base_instance_->set_log_base(10.0);
+  State begin_sentence, null_context;
+  std::fill(begin_sentence.words, begin_sentence.words + NPLM_MAX_ORDER - 1, base_instance_->lookup_word("<s>"));
+  null_word_ = base_instance_->lookup_word("<null>");
+  std::fill(null_context.words, null_context.words + NPLM_MAX_ORDER - 1, null_word_);
+
+  Init(begin_sentence, null_context, vocab_, base_instance_->get_order());
+}
+
+Model::~Model() {}
+
+FullScoreReturn Model::FullScore(const State &from, const WordIndex new_word, State &out_state) const {
+  nplm::neuralLM *lm = backend_.get();
+  if (!lm) {
+    lm = new nplm::neuralLM(*base_instance_);
+    backend_.reset(lm);
+    lm->set_cache(cache_size_);
+  }
+  // State is in natural word order.
+  FullScoreReturn ret;
+  for (int i = 0; i < lm->get_order() - 1; ++i) {
+    lm->staging_ngram()(i) = from.words[i];
+  }
+  lm->staging_ngram()(lm->get_order() - 1) = new_word;
+  ret.prob = lm->lookup_from_staging();
+  // Always say full order.
+  ret.ngram_length = lm->get_order();
+  // Shift everything down by one.
+  memcpy(out_state.words, from.words + 1, sizeof(WordIndex) * (lm->get_order() - 2));
+  out_state.words[lm->get_order() - 2] = new_word;
+  // Fill in trailing words with zeros so state comparison works.
+  memset(out_state.words + lm->get_order() - 1, 0, sizeof(WordIndex) * (NPLM_MAX_ORDER - lm->get_order()));
+  return ret;
+}
+
+// TODO: optimize with direct call?
+FullScoreReturn Model::FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const {
+  // State is in natural word order.  The API here specifies reverse order.
+  std::size_t state_length = std::min<std::size_t>(Order() - 1, context_rend - context_rbegin);
+  State state;
+  // Pad with null words.
+  for (lm::WordIndex *i = state.words; i < state.words + Order() - 1 - state_length; ++i) {
+    *i = null_word_;
+  }
+  // Put new words at the end.
+  std::reverse_copy(context_rbegin, context_rbegin + state_length, state.words + Order() - 1 - state_length);
+  return FullScore(state, new_word, out_state);
+}
+
+} // namespace np
+} // namespace lm
diff --git a/lm/wrappers/nplm.hh b/lm/wrappers/nplm.hh
new file mode 100644
index 0000000000..b7dd4a21e9
--- /dev/null
+++ b/lm/wrappers/nplm.hh
@@ -0,0 +1,83 @@
+#ifndef LM_WRAPPERS_NPLM_H
+#define LM_WRAPPERS_NPLM_H
+
+#include "lm/facade.hh"
+#include "lm/max_order.hh"
+#include "util/string_piece.hh"
+
+#include <boost/thread/tss.hpp>
+#include <boost/scoped_ptr.hpp>
+
+/* Wrapper to NPLM "by Ashish Vaswani, with contributions from David Chiang
+ * and Victoria Fossum."  
+ * http://nlg.isi.edu/software/nplm/
+ */
+
+namespace nplm {
+class vocabulary;
+class neuralLM;
+} // namespace nplm
+
+namespace lm {
+namespace np {
+
+class Vocabulary : public base::Vocabulary {
+  public:
+    Vocabulary(const nplm::vocabulary &vocab);
+
+    ~Vocabulary();
+
+    WordIndex Index(const std::string &str) const;
+
+    // TODO: lobby them to support StringPiece
+    WordIndex Index(const StringPiece &str) const {
+      return Index(std::string(str.data(), str.size()));
+    }
+
+    lm::WordIndex NullWord() const { return null_word_; }
+
+  private:
+    const nplm::vocabulary &vocab_;
+
+    const lm::WordIndex null_word_;
+};
+
+// Sorry for imposing my limitations on your code.
+#define NPLM_MAX_ORDER 7
+
+struct State {
+  WordIndex words[NPLM_MAX_ORDER - 1];
+};
+
+class Model : public lm::base::ModelFacade<Model, State, Vocabulary> {
+  private:
+    typedef lm::base::ModelFacade<Model, State, Vocabulary> P;
+
+  public:
+    // Does this look like an NPLM?
+    static bool Recognize(const std::string &file);
+
+    explicit Model(const std::string &file, std::size_t cache_size = 1 << 20);
+
+    ~Model();
+
+    FullScoreReturn FullScore(const State &from, const WordIndex new_word, State &out_state) const;
+
+    FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
+
+  private:
+    boost::scoped_ptr<nplm::neuralLM> base_instance_;
+
+    mutable boost::thread_specific_ptr<nplm::neuralLM> backend_;
+
+    Vocabulary vocab_;
+
+    lm::WordIndex null_word_;
+
+    const std::size_t cache_size_;
+};
+
+} // namespace np
+} // namespace lm
+
+#endif // LM_WRAPPERS_NPLM_H

From eb5336ad9cf29ff32edebea6c28feb7b01007839 Mon Sep 17 00:00:00 2001
From: Rico Sennrich <rico.sennrich@gmx.ch>
Date: Thu, 17 Jul 2014 16:50:08 +0100
Subject: [PATCH 62/84] thread-safety, precomputation and caching for
 NeuralLMWrapper

doesn't work with default nplm, but with the fork at https://github.com/rsennrich/nplm
---
 lm/Jamfile                   |  2 +-
 moses/LM/Jamfile             |  2 +-
 moses/LM/NeuralLMWrapper.cpp | 52 ++++++++++++------------------------
 moses/LM/NeuralLMWrapper.h   | 10 ++++---
 4 files changed, 25 insertions(+), 41 deletions(-)

diff --git a/lm/Jamfile b/lm/Jamfile
index 4693f9e01b..6ca37c99e5 100644
--- a/lm/Jamfile
+++ b/lm/Jamfile
@@ -17,7 +17,7 @@ wrappers = ;
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
   lib neuralLM : : <search>$(with-nplm)/src ;
-  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
+  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <cxxflags>-fopenmp ;
   alias nplm : nplm.o neuralLM ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
   wrappers += nplm ;
 }
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 87b0c1b36d..78fac90a01 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -84,7 +84,7 @@ if $(with-ldhtlm) {
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
   lib neuralLM : : <search>$(with-nplm)/lib <search>$(with-nplm)/lib64 ;
-  obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen-3.1.4 ;
+  obj NeuralLMWrapper.o : NeuralLMWrapper.cpp neuralLM ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
   alias nplm : NeuralLMWrapper.o neuralLM : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
   dependencies += nplm ;
   lmmacros += LM_NEURAL ;
diff --git a/moses/LM/NeuralLMWrapper.cpp b/moses/LM/NeuralLMWrapper.cpp
index 467c41846c..9411bd2c45 100644
--- a/moses/LM/NeuralLMWrapper.cpp
+++ b/moses/LM/NeuralLMWrapper.cpp
@@ -1,6 +1,7 @@
 
 #include "moses/StaticData.h"
 #include "moses/FactorCollection.h"
+#include <boost/functional/hash.hpp>
 #include "NeuralLMWrapper.h"
 #include "neuralLM.h"
 #include <model.h>
@@ -12,21 +13,19 @@ namespace Moses
 NeuralLMWrapper::NeuralLMWrapper(const std::string &line)
 :LanguageModelSingleFactor(line)
 {
-  // This space intentionally left blank
+  ReadParameters();
 }
 
 
 NeuralLMWrapper::~NeuralLMWrapper()
 {
-  delete m_neuralLM;
+  delete m_neuralLM_shared;
 }
 
 
 void NeuralLMWrapper::Load()
 {
 
-  TRACE_ERR("Loading NeuralLM " << m_filePath << endl);
-
   // Set parameters required by ancestor classes
   FactorCollection &factorCollection = FactorCollection::Instance();
   m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
@@ -34,59 +33,42 @@ void NeuralLMWrapper::Load()
   m_sentenceEnd		= factorCollection.AddFactor(Output, m_factorType, EOS_);
   m_sentenceEndWord[m_factorType] = m_sentenceEnd;
 
-  m_neuralLM = new nplm::neuralLM();
-  m_neuralLM->read(m_filePath);
-  m_neuralLM->set_log_base(10);
+  m_neuralLM_shared = new nplm::neuralLM(m_filePath, true);
+  m_neuralLM_shared->set_log_base(10);
+  //TODO: config option?
+  m_neuralLM_shared->set_cache(1000000);
+
+  UTIL_THROW_IF2(m_nGramOrder != m_neuralLM_shared->get_order(),
+                 "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() << ", but Moses expects " << m_nGramOrder);
 
-  //TODO: Implement this
 }
 
 
 LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
 {
 
-  unsigned int hashCode = 0;
+  if (!m_neuralLM.get()) {
+    m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared));
+  }
+  size_t hashCode = 0;
+
   vector<int> words(contextFactor.size());
-//  TRACE_ERR("NeuralLM words:");
-  for (size_t i=0, n=contextFactor.size(); i<n; i+=1) {
+  for (size_t i=0, n=contextFactor.size(); i<n; i++) {
     const Word* word = contextFactor[i];
     const Factor* factor = word->GetFactor(m_factorType);
     const std::string string= factor->GetString().as_string();
     int neuralLM_wordID = m_neuralLM->lookup_word(string);
     words[i] = neuralLM_wordID;
-    hashCode += neuralLM_wordID;
-//    TRACE_ERR(" " << string << "(" << neuralLM_wordID << ")" );
+    boost::hash_combine(hashCode, neuralLM_wordID);
   }
 
   double value = m_neuralLM->lookup_ngram(words);
-//  TRACE_ERR("\t=\t" << value);
-//  TRACE_ERR(endl);
 
   // Create a new struct to hold the result
   LMResult ret;
   ret.score = value;
   ret.unknown = false;
 
-
-  // State* finalState is a void pointer
-  //
-  // Construct a hash value from the vector of words (contextFactor)
-  //
-  // The hash value must be the same size as sizeof(void*)
-  //
-  // TODO Set finalState to the above hash value
-
-  // use last word as state info
-//  const Factor *factor;
-//  size_t hash_value(const Factor &f);
-//  if (contextFactor.size()) {
-//    factor = contextFactor.back()->GetFactor(m_factorType);
-//  } else {
-//    factor = NULL;
-//  }
-//
-//  (*finalState) = (State*) factor;
-
   (*finalState) = (State*) hashCode;
 
   return ret;
diff --git a/moses/LM/NeuralLMWrapper.h b/moses/LM/NeuralLMWrapper.h
index 6a05aa09a1..7207605e18 100644
--- a/moses/LM/NeuralLMWrapper.h
+++ b/moses/LM/NeuralLMWrapper.h
@@ -2,6 +2,8 @@
 
 #include "SingleFactor.h"
 
+#include <boost/thread/tss.hpp>
+
 namespace nplm {
   class neuralLM;
 }
@@ -9,16 +11,16 @@ namespace nplm {
 namespace Moses
 {
 
-/** Implementation of single factor LM using IRST's code.
- */
 class NeuralLMWrapper : public LanguageModelSingleFactor
 {
 protected:
-  nplm::neuralLM *m_neuralLM;
+  // big data (vocab, weights, cache) shared among threads
+  nplm::neuralLM *m_neuralLM_shared;
+  // thread-specific nplm for thread-safety
+  mutable boost::thread_specific_ptr<nplm::neuralLM> m_neuralLM;
 
 public:
   NeuralLMWrapper(const std::string &line);
-  //  NeuralLM(const std::string &line);
   ~NeuralLMWrapper();
 
   virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0) const;

From 19a5ef4a1a64fcc77741db7e900b2fa7294434e2 Mon Sep 17 00:00:00 2001
From: Matthias Huck <huck@i6.informatik.rwth-aachen.de>
Date: Thu, 17 Jul 2014 20:19:28 +0100
Subject: [PATCH 63/84] relax-parse: use cin.peek()

Hope this eliminates some weird behavior
---
 phrase-extract/relax-parse-main.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/phrase-extract/relax-parse-main.cpp b/phrase-extract/relax-parse-main.cpp
index e5feb94d0f..b415c4d0ef 100644
--- a/phrase-extract/relax-parse-main.cpp
+++ b/phrase-extract/relax-parse-main.cpp
@@ -31,15 +31,15 @@ int main(int argc, char* argv[])
 
   // loop through all sentences
   int i=0;
-  string inBuffer;
-  while(getline(cin, inBuffer)) {
+  string inBufferString;
+  while(cin.peek() != EOF) {
+    getline(cin,inBufferString);
     i++;
     if (i%1000 == 0) cerr << "." << flush;
     if (i%10000 == 0) cerr << ":" << flush;
     if (i%100000 == 0) cerr << "!" << flush;
 
     // process into syntax tree representation
-    string inBufferString = string( inBuffer );
     set< string > labelCollection;         // set of labels, not used
     map< string, int > topLabelCollection; // count of top labels, not used
     SyntaxTree tree;

From 3194c4783d5c7b02c76282f1daadf6de7d602dbb Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Thu, 17 Jul 2014 21:31:40 +0100
Subject: [PATCH 64/84] Setting DEFAULT_MAX_PHRASE_LENGTH to
 numeric_limits<size_t>::max() breaks the regression test for (deprecated)
 PhraseDictionaryDynSuffixArray, so I set it to 100,000 (to have virtually
 unlimited phrase length for suffix array-based phrase tables.

---
 moses/TypeDef.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index 7852d130d1..2f45f5e9ce 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -31,6 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <BaseTsd.h>
 #else
 #include <stdint.h>
+
 typedef uint32_t UINT32;
 typedef uint64_t UINT64;
 #endif
@@ -60,7 +61,8 @@ const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
 const size_t DEFAULT_MAX_TRANS_OPT_SIZE	= 5000;
 const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
 #ifdef PT_UG
-  const size_t DEFAULT_MAX_PHRASE_LENGTH = -1;
+// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail. 
+ const size_t DEFAULT_MAX_PHRASE_LENGTH = 100000;
 #else
  const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
 #endif

From 4d4e194daca1e24df9d95b1a59afb844a7ee1e5a Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Thu, 17 Jul 2014 22:46:01 +0100
Subject: [PATCH 65/84] Fixed build failure when compiled without --with-mm.

---
 moses-cmd/Jamfile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile
index d257cd26cf..0599de2e3b 100644
--- a/moses-cmd/Jamfile
+++ b/moses-cmd/Jamfile
@@ -10,4 +10,9 @@ $(TOP)//boost_program_options
 deps 
 ;
 
+
+if [ option.get "with-mm" : no : yes ] = yes {
 alias programs : moses lmbrgrid simulate-pe ;
+} else { 
+alias programs : moses lmbrgrid ;
+}
\ No newline at end of file

From dfb97f888650f80145f72a2768fd4cbf8bba296d Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Thu, 17 Jul 2014 22:47:57 +0100
Subject: [PATCH 66/84] Added missing EOL at end of file.

---
 moses-cmd/Jamfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile
index 0599de2e3b..4ccf0f9a46 100644
--- a/moses-cmd/Jamfile
+++ b/moses-cmd/Jamfile
@@ -15,4 +15,4 @@ if [ option.get "with-mm" : no : yes ] = yes {
 alias programs : moses lmbrgrid simulate-pe ;
 } else { 
 alias programs : moses lmbrgrid ;
-}
\ No newline at end of file
+}

From 1347b153ee329773793237d7552e227224b92856 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 17 Jul 2014 23:13:06 +0100
Subject: [PATCH 67/84] compiles with c++11. Used by oxlm

---
 moses/TranslationModel/UG/mm/ug_im_tsa.h    | 8 ++++----
 moses/TranslationModel/UG/mm/ug_im_ttrack.h | 6 +++---
 moses/TranslationModel/UG/mm/ug_tsa_base.h  | 8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/moses/TranslationModel/UG/mm/ug_im_tsa.h b/moses/TranslationModel/UG/mm/ug_im_tsa.h
index 1de45d877c..6b4a83f726 100644
--- a/moses/TranslationModel/UG/mm/ug_im_tsa.h
+++ b/moses/TranslationModel/UG/mm/ug_im_tsa.h
@@ -52,12 +52,12 @@ namespace ugdiss
     
   public:
     imTSA();
-    imTSA(shared_ptr<Ttrack<TOKEN> const> c, 
+    imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c, 
 	  bdBitset const* filt, 
 	  ostream* log = NULL);
 
     imTSA(imTSA<TOKEN> const& prior, 
-	  shared_ptr<imTtrack<TOKEN> const> const&   crp,
+	  boost::shared_ptr<imTtrack<TOKEN> const> const&   crp,
 	  vector<id_type> const& newsids, size_t const vsize);
 
     count_type 
@@ -140,7 +140,7 @@ namespace ugdiss
   // specified in filter
   template<typename TOKEN>
   imTSA<TOKEN>::
-  imTSA(shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, ostream* log)
+  imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, ostream* log)
   {
     assert(c);
     this->corpus = c;
@@ -359,7 +359,7 @@ namespace ugdiss
   template<typename TOKEN>
   imTSA<TOKEN>::
   imTSA(imTSA<TOKEN> const& prior, 
-  	shared_ptr<imTtrack<TOKEN> const> const&   crp,
+  	boost::shared_ptr<imTtrack<TOKEN> const> const&   crp,
   	vector<id_type> const& newsids, size_t const vsize)
   {
     typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(crp.get());
diff --git a/moses/TranslationModel/UG/mm/ug_im_ttrack.h b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
index 0c6e4afbf6..fd460d93f0 100644
--- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
@@ -190,13 +190,13 @@ namespace ugdiss
 
   /// add a sentence to the database
   template<typename TOKEN>
-  shared_ptr<imTtrack<TOKEN> > 
-  append(shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
+  boost::shared_ptr<imTtrack<TOKEN> > 
+  append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
   {
 #if 1
     if (crp) crp->m_check_token_count();
 #endif
-    shared_ptr<imTtrack<TOKEN> > ret;
+    boost::shared_ptr<imTtrack<TOKEN> > ret;
     if (crp == NULL)
       {
   	ret.reset(new imTtrack<TOKEN>());
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_base.h b/moses/TranslationModel/UG/mm/ug_tsa_base.h
index a6291ac3c3..dc5e270c28 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_base.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_base.h
@@ -53,7 +53,7 @@ namespace ugdiss
     /* an entry in the array, for iteration over all occurrences of a
      * particular sequence */
     // typedef boost::dynamic_bitset<uint64_t>           bitset; 
-    typedef shared_ptr<bitvector>         bitset_pointer;
+    typedef boost::shared_ptr<bitvector>         bitset_pointer;
     typedef TKN                                        Token;
     typedef BitSetCache<TSA<TKN> >                     BSC_t; 
     /* to allow caching of bit vectors that are expensive to create on
@@ -62,7 +62,7 @@ namespace ugdiss
     friend class TSA_tree_iterator<TKN>;
 
   protected:
-    shared_ptr<Ttrack<TKN> const> corpus; // pointer to the underlying corpus
+    boost::shared_ptr<Ttrack<TKN> const> corpus; // pointer to the underlying corpus
     char const*               startArray; // beginning ...
     char const*                 endArray; // ... and end ...
     // of memory block storing the actual TSA
@@ -139,7 +139,7 @@ namespace ugdiss
     getUpperBound(id_type id) const = 0;
 
   public:
-    shared_ptr<BSC_t> bsc;
+    boost::shared_ptr<BSC_t> bsc;
     
     char const* arrayStart() const { return startArray; }
     char const* arrayEnd()   const { return endArray;   }
@@ -298,7 +298,7 @@ namespace ugdiss
     bitset_pointer
     getBitSet(TKN const* startKey, size_t keyLen) const;
     
-    shared_ptr<bitvector>
+    boost::shared_ptr<bitvector>
     findTree(TKN const* treeStart, TKN const* treeEnd, 
              bitvector const* filter) const;
     

From 353eff77d30b18e1930d3e9855a8f08d196df8d7 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Thu, 17 Jul 2014 23:33:39 +0100
Subject: [PATCH 68/84] Fixed build failure when compiled without --with-mm.

---
 moses-cmd/Jamfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile
index 4ccf0f9a46..79af196e99 100644
--- a/moses-cmd/Jamfile
+++ b/moses-cmd/Jamfile
@@ -3,6 +3,8 @@ alias deps : IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp ..//z
 exe moses : Main.cpp deps ;
 exe lmbrgrid : LatticeMBRGrid.cpp deps ;
 
+if [ option.get "with-mm" : : "yes" ] {
+
 exe simulate-pe : 
 simulate-pe.cc 
 $(TOP)/moses/TranslationModel/UG/generic//generic 
@@ -10,8 +12,6 @@ $(TOP)//boost_program_options
 deps 
 ;
 
-
-if [ option.get "with-mm" : no : yes ] = yes {
 alias programs : moses lmbrgrid simulate-pe ;
 } else { 
 alias programs : moses lmbrgrid ;

From b0fbf80dc0968885b9a3607e95a709af97561664 Mon Sep 17 00:00:00 2001
From: Rico Sennrich <rico.sennrich@gmx.ch>
Date: Fri, 18 Jul 2014 14:39:54 +0100
Subject: [PATCH 69/84] remove unnecessary code, and floor NPLM scores

---
 moses/LM/NeuralLMWrapper.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/moses/LM/NeuralLMWrapper.cpp b/moses/LM/NeuralLMWrapper.cpp
index 9411bd2c45..ab7b5400bc 100644
--- a/moses/LM/NeuralLMWrapper.cpp
+++ b/moses/LM/NeuralLMWrapper.cpp
@@ -4,7 +4,6 @@
 #include <boost/functional/hash.hpp>
 #include "NeuralLMWrapper.h"
 #include "neuralLM.h"
-#include <model.h>
 
 using namespace std;
 
@@ -34,7 +33,6 @@ void NeuralLMWrapper::Load()
   m_sentenceEndWord[m_factorType] = m_sentenceEnd;
 
   m_neuralLM_shared = new nplm::neuralLM(m_filePath, true);
-  m_neuralLM_shared->set_log_base(10);
   //TODO: config option?
   m_neuralLM_shared->set_cache(1000000);
 
@@ -56,7 +54,7 @@ LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, Sta
   for (size_t i=0, n=contextFactor.size(); i<n; i++) {
     const Word* word = contextFactor[i];
     const Factor* factor = word->GetFactor(m_factorType);
-    const std::string string= factor->GetString().as_string();
+    const std::string string = factor->GetString().as_string();
     int neuralLM_wordID = m_neuralLM->lookup_word(string);
     words[i] = neuralLM_wordID;
     boost::hash_combine(hashCode, neuralLM_wordID);
@@ -66,7 +64,7 @@ LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, Sta
 
   // Create a new struct to hold the result
   LMResult ret;
-  ret.score = value;
+  ret.score = FloorScore(value);
   ret.unknown = false;
 
   (*finalState) = (State*) hashCode;

From ff1d5ca0d6a941f16c805295d4ee35bc80dbb8b4 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Fri, 18 Jul 2014 22:24:28 +0800
Subject: [PATCH 70/84] KenLM 8386117fdc026b8532adac85c5f921388754d569

---
 lm/Jamfile                     |  4 ++--
 lm/builder/Jamfile             |  4 ++++
 lm/builder/dump_counts_main.cc | 36 ++++++++++++++++++++++++++++++++++
 lm/builder/print.cc            |  5 ++---
 lm/ngram_query.hh              |  2 +-
 5 files changed, 45 insertions(+), 6 deletions(-)
 create mode 100644 lm/builder/dump_counts_main.cc

diff --git a/lm/Jamfile b/lm/Jamfile
index 6ca37c99e5..227b220142 100644
--- a/lm/Jamfile
+++ b/lm/Jamfile
@@ -17,7 +17,7 @@ wrappers = ;
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
   lib neuralLM : : <search>$(with-nplm)/src ;
-  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <cxxflags>-fopenmp ;
+  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
   alias nplm : nplm.o neuralLM ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
   wrappers += nplm ;
 }
@@ -37,4 +37,4 @@ for local p in [ glob *_main.cc ] {
   exes += $(name) ;
 }
 
-alias programs : $(exes) filter//filter : <threading>multi:<source>builder//lmplz ;
+alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
diff --git a/lm/builder/Jamfile b/lm/builder/Jamfile
index b596e086ae..1e0e18b5fe 100644
--- a/lm/builder/Jamfile
+++ b/lm/builder/Jamfile
@@ -4,6 +4,10 @@ fakelib builder : [ glob *.cc : *test.cc *main.cc ]
 
 exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
 
+exe dump_counts : dump_counts_main.cc builder ;
+
+alias programs : lmplz dump_counts ;
+
 import testing ;
 unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;
 unit-test adjust_counts_test : adjust_counts_test.cc builder /top//boost_unit_test_framework ;
diff --git a/lm/builder/dump_counts_main.cc b/lm/builder/dump_counts_main.cc
new file mode 100644
index 0000000000..fa00167925
--- /dev/null
+++ b/lm/builder/dump_counts_main.cc
@@ -0,0 +1,36 @@
+#include "lm/builder/print.hh"
+#include "lm/word_index.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+
+#include <boost/lexical_cast.hpp>
+
+#include <iostream>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+  if (argc != 4) {
+    std::cerr << "Usage: " << argv[0] << " counts vocabulary order\n"
+    "The counts file contains records with 4-byte vocabulary ids followed by 8-byte\n"
+    "counts.  Each record has order many vocabulary ids.\n"
+    "The vocabulary file contains the words delimited by NULL in order of id.\n"
+    "The vocabulary file may not be compressed because it is mmapped but the counts\n"
+    "file can be compressed.\n";
+    return 1;
+  }
+  util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
+  util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
+  lm::builder::VocabReconstitute vocab(vocab_file.get());
+  unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
+  std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
+  while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
+    UTIL_THROW_IF(got != record.size(), util::Exception, "Read " << got << " bytes at the end of file, which is not a complete record of length " << record.size());
+    const lm::WordIndex *words = reinterpret_cast<const lm::WordIndex*>(&*record.begin());
+    for (const lm::WordIndex *i = words; i != words + order; ++i) {
+      UTIL_THROW_IF(*i >= vocab.Size(), util::Exception, "Vocab ID " << *i << " is larger than the vocab file's maximum of " << vocab.Size() << ".  Are you sure you have the right order and vocab file for these counts?");
+      std::cout << vocab.Lookup(*i) << ' ';
+    }
+    // TODO don't use std::cout because it is slow.  Add fast uint64_t printing support to FakeOFStream.
+    std::cout << *reinterpret_cast<const uint64_t*>(words + order) << '\n';
+  }
+}
diff --git a/lm/builder/print.cc b/lm/builder/print.cc
index c70e62ed66..75f15f0a6d 100644
--- a/lm/builder/print.cc
+++ b/lm/builder/print.cc
@@ -54,9 +54,8 @@ void PrintARPA::Run(const util::stream::ChainPositions &positions) {
       for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
         out << ' ' << vocab_.Lookup(*i);
       }
-      float backoff = stream->Value().complete.backoff;
-      if (backoff != 0.0)
-        out << '\t' << backoff;
+      if (order != positions.size())
+        out << '\t' << stream->Value().complete.backoff;
       out << '\n';
     
     }
diff --git a/lm/ngram_query.hh b/lm/ngram_query.hh
index 9e32d113ab..5f330c5cc1 100644
--- a/lm/ngram_query.hh
+++ b/lm/ngram_query.hh
@@ -36,7 +36,7 @@ struct FullPrint : public BasicPrint {
       "Perplexity including OOVs:\t" << ppl_including_oov << "\n"
       "Perplexity excluding OOVs:\t" << ppl_excluding_oov << "\n"
       "OOVs:\t" << corpus_oov << "\n"
-      "Tokenss:\t" << corpus_tokens << '\n'
+      "Tokens:\t" << corpus_tokens << '\n'
       ;
   }
 };

From b10760f4283140a5bb285b3e68ca3eee9800de99 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 18 Jul 2014 20:36:53 +0100
Subject: [PATCH 71/84] delete PhraseTableImplementation. Old enum

---
 moses/Parameter.cpp                           | 19 ++++++++++---------
 .../RuleTable/PhraseDictionaryOnDisk.h        |  4 ----
 moses/TypeDef.h                               | 18 ------------------
 3 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index 10ac566278..fb41d9a3c3 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -36,6 +36,7 @@ using namespace std;
 
 namespace Moses
 {
+
 /** define allowed parameters */
 Parameter::Parameter()
 {
@@ -513,29 +514,29 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
       }
       UTIL_THROW_IF2(token.size() < 5, "Phrase table must have at least 5 scores");
 
-      PhraseTableImplementation implementation = (PhraseTableImplementation) Scan<int>(token[0]);
+      int implementation = Scan<int>(token[0]);
 
       string ptType;
       switch (implementation) {
-      case Memory:
+      case 0: // Memory
         ptType = "PhraseDictionaryMemory";
         break;
-      case Binary:
+      case 1: // Binary
         ptType = "PhraseDictionaryBinary";
         break;
-      case OnDisk:
+      case 2: // OnDisk
         ptType = "PhraseDictionaryOnDisk";
         break;
-      case SCFG:
+      case 6: // SCFG
         ptType = "PhraseDictionaryMemory";
         break;
-      case Compact:
+      case 12: // Compact
         ptType = "PhraseDictionaryCompact";
         break;
-      case SuffixArray:
+      case 8: // SuffixArray
         ptType = "PhraseDictionarySuffixArray";
         break;
-      case DSuffixArray:
+      case 14: // DSuffixArray
         ptType = "PhraseDictionaryDynSuffixArray";
         break;
       default:
@@ -591,7 +592,7 @@ void Parameter::ConvertWeightArgsPhraseModel(const string &oldWeightName)
       ptLine << "num-features=" << numScoreComponent << " ";
       ptLine << "table-limit=" << maxTargetPhrase[currDict] << " ";
 
-      if (implementation == SuffixArray || implementation == DSuffixArray) {
+      if (implementation == 8 || implementation == 14) {
         ptLine << "target-path=" << token[5] << " ";
         ptLine << "alignment-path=" << token[6] << " ";
       }
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
index 19548411ce..4deb800f8f 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h
@@ -69,10 +69,6 @@ class PhraseDictionaryOnDisk : public PhraseDictionary
   ~PhraseDictionaryOnDisk();
   void Load();
 
-  PhraseTableImplementation GetPhraseTableImplementation() const {
-    return OnDisk;
-  }
-
   // PhraseDictionary impl
   virtual ChartRuleLookupManager *CreateRuleLookupManager(
     const ChartParser &parser,
diff --git a/moses/TypeDef.h b/moses/TypeDef.h
index 2f45f5e9ce..a5c434d4b3 100644
--- a/moses/TypeDef.h
+++ b/moses/TypeDef.h
@@ -112,24 +112,6 @@ enum DistortionOrientationOptions {
 };
 }
 
-enum PhraseTableImplementation {
-  Memory				= 0
-  ,Binary				= 1
-  ,OnDisk				= 2
-  //,GlueRule		= 3
-  //,Joshua			= 4
-  //,MemorySourceLabel	= 5
-  ,SCFG					= 6
-  //,BerkeleyDb	= 7
-  ,SuffixArray	= 8
-  ,Hiero        = 9
-  ,ALSuffixArray = 10
-  ,FuzzyMatch    = 11
-  ,Compact      = 12
-  ,Interpolated = 13
-  ,DSuffixArray = 14
-  ,MemMappedSA = 15
-};
 
 enum InputTypeEnum {
   SentenceInput						= 0

From 07d325876cbac72e538c0f03b5933c705c8efc09 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Fri, 18 Jul 2014 21:30:49 +0100
Subject: [PATCH 72/84] dump_counts doesn't compile

---
 lm/Jamfile         | 4 +++-
 lm/builder/Jamfile | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/lm/Jamfile b/lm/Jamfile
index 227b220142..0f0297ffad 100644
--- a/lm/Jamfile
+++ b/lm/Jamfile
@@ -37,4 +37,6 @@ for local p in [ glob *_main.cc ] {
   exes += $(name) ;
 }
 
-alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
+#alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
+alias programs : $(exes) filter//filter : <threading>multi:<source>builder//lmplz ;
+
diff --git a/lm/builder/Jamfile b/lm/builder/Jamfile
index 1e0e18b5fe..2e472e2223 100644
--- a/lm/builder/Jamfile
+++ b/lm/builder/Jamfile
@@ -4,9 +4,10 @@ fakelib builder : [ glob *.cc : *test.cc *main.cc ]
 
 exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
 
-exe dump_counts : dump_counts_main.cc builder ;
+#exe dump_counts : dump_counts_main.cc builder ;
 
-alias programs : lmplz dump_counts ;
+#alias programs : lmplz dump_counts ;
+alias programs : lmplz ;
 
 import testing ;
 unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;

From c83c5a3ee6f3ef7480e7a782d2023af9e99c1711 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Sat, 19 Jul 2014 06:54:01 +0800
Subject: [PATCH 73/84] D'ph forgot to copy util

---
 lm/Jamfile              |  4 +---
 lm/builder/Jamfile      |  5 ++---
 util/exception.hh       |  8 ++++----
 util/read_compressed.cc | 11 +++++++++++
 util/read_compressed.hh |  4 ++++
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/lm/Jamfile b/lm/Jamfile
index 0f0297ffad..227b220142 100644
--- a/lm/Jamfile
+++ b/lm/Jamfile
@@ -37,6 +37,4 @@ for local p in [ glob *_main.cc ] {
   exes += $(name) ;
 }
 
-#alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
-alias programs : $(exes) filter//filter : <threading>multi:<source>builder//lmplz ;
-
+alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
diff --git a/lm/builder/Jamfile b/lm/builder/Jamfile
index 2e472e2223..1e0e18b5fe 100644
--- a/lm/builder/Jamfile
+++ b/lm/builder/Jamfile
@@ -4,10 +4,9 @@ fakelib builder : [ glob *.cc : *test.cc *main.cc ]
 
 exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
 
-#exe dump_counts : dump_counts_main.cc builder ;
+exe dump_counts : dump_counts_main.cc builder ;
 
-#alias programs : lmplz dump_counts ;
-alias programs : lmplz ;
+alias programs : lmplz dump_counts ;
 
 import testing ;
 unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;
diff --git a/util/exception.hh b/util/exception.hh
index 2fb00667fe..4e50a6f3a0 100644
--- a/util/exception.hh
+++ b/util/exception.hh
@@ -1,5 +1,5 @@
-#ifndef UTIL_EXCEPTION__
-#define UTIL_EXCEPTION__
+#ifndef UTIL_EXCEPTION_H
+#define UTIL_EXCEPTION_H
 
 #include <exception>
 #include <limits>
@@ -84,7 +84,7 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
   UTIL_THROW_BACKEND(NULL, Exception, , Modify);
 
 #define UTIL_THROW2(Modify) \
-		UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
+  UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
 
 #if __GNUC__ >= 3
 #define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
@@ -146,4 +146,4 @@ inline std::size_t CheckOverflow(uint64_t value) {
 
 } // namespace util
 
-#endif // UTIL_EXCEPTION__
+#endif // UTIL_EXCEPTION_H
diff --git a/util/read_compressed.cc b/util/read_compressed.cc
index 71ef0e251c..e1f4cd7e3b 100644
--- a/util/read_compressed.cc
+++ b/util/read_compressed.cc
@@ -435,4 +435,15 @@ std::size_t ReadCompressed::Read(void *to, std::size_t amount) {
   return internal_->Read(to, amount, *this);
 }
 
+std::size_t ReadCompressed::ReadOrEOF(void *const to_in, std::size_t amount) {
+  uint8_t *to = reinterpret_cast<uint8_t*>(to_in);
+  while (amount) {
+    std::size_t got = Read(to, amount);
+    if (!got) break;
+    to += got;
+    amount -= got;
+  }
+  return to - reinterpret_cast<uint8_t*>(to_in);
+}
+
 } // namespace util
diff --git a/util/read_compressed.hh b/util/read_compressed.hh
index 763e6bbd3a..767ee94b2c 100644
--- a/util/read_compressed.hh
+++ b/util/read_compressed.hh
@@ -62,6 +62,10 @@ class ReadCompressed {
 
     std::size_t Read(void *to, std::size_t amount);
 
+    // Repeatedly call read to fill a buffer unless EOF is hit.
+    // Return number of bytes read.
+    std::size_t ReadOrEOF(void *const to, std::size_t amount);
+
     uint64_t RawAmount() const { return raw_amount_; }
 
   private:

From ab06edda5b5ce64d6f624aca234146e6d222d407 Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Sat, 19 Jul 2014 19:05:58 +0100
Subject: [PATCH 74/84] Moved merge-sorted from contrib/m4m/util to misc.

---
 misc/Jamfile                               | 10 +++++++++-
 {contrib/m4m/util => misc}/merge-sorted.cc |  0
 2 files changed, 9 insertions(+), 1 deletion(-)
 rename {contrib/m4m/util => misc}/merge-sorted.cc (100%)

diff --git a/misc/Jamfile b/misc/Jamfile
index d466e306ca..e90ec8d0d4 100644
--- a/misc/Jamfile
+++ b/misc/Jamfile
@@ -36,4 +36,12 @@ else {
     alias programsProbing ;
 }
 
-alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing ;
+exe merge-sorted : 
+merge-sorted.cc 
+../moses//moses
+../moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+; 
+
+alias programs : 1-1-Extraction TMining generateSequences processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin programsProbing merge-sorted ;
diff --git a/contrib/m4m/util/merge-sorted.cc b/misc/merge-sorted.cc
similarity index 100%
rename from contrib/m4m/util/merge-sorted.cc
rename to misc/merge-sorted.cc

From efee2695c31e1086af783c1b092fc842fb7bb1a4 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 21 Jul 2014 11:04:43 +0100
Subject: [PATCH 75/84] Merge 08811deb17337356cd8dae9c59c0160590679a35 from
 joshua

---
 mert/BleuScorer.cpp        |   4 +-
 mert/BleuScorer.h          |   2 +-
 mert/FeatureStats.cpp      |  30 ++-
 mert/FeatureStats.h        |   7 +
 mert/ForestRescore.cpp     | 422 +++++++++++++++++++++++++++++++++++++
 mert/ForestRescore.h       | 120 +++++++++++
 mert/ForestRescoreTest.cpp | 246 +++++++++++++++++++++
 mert/HopeFearDecoder.cpp   | 328 ++++++++++++++++++++++++++++
 mert/HopeFearDecoder.h     | 151 +++++++++++++
 mert/Hypergraph.cpp        | 286 +++++++++++++++++++++++++
 mert/Hypergraph.h          | 251 ++++++++++++++++++++++
 mert/HypergraphTest.cpp    | 151 +++++++++++++
 mert/Jamfile               |   7 +-
 mert/MiraFeatureVector.cpp |  40 +++-
 mert/MiraFeatureVector.h   |   8 +
 mert/MiraWeightVector.cpp  |  17 ++
 mert/MiraWeightVector.h    |   8 +-
 mert/kbmira.cpp            | 187 ++++++++--------
 18 files changed, 2154 insertions(+), 111 deletions(-)
 create mode 100644 mert/ForestRescore.cpp
 create mode 100644 mert/ForestRescore.h
 create mode 100644 mert/ForestRescoreTest.cpp
 create mode 100644 mert/HopeFearDecoder.cpp
 create mode 100644 mert/HopeFearDecoder.h
 create mode 100644 mert/Hypergraph.cpp
 create mode 100644 mert/Hypergraph.h
 create mode 100644 mert/HypergraphTest.cpp

diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 467855d9b0..f6ada2aa8c 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -266,12 +266,12 @@ float smoothedSentenceBleu
 float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
 {
   // Sum sent and background
-  std::vector<float> stats;
   UTIL_THROW_IF(sent.size()!=bg.size(), util::Exception, "Error");
   UTIL_THROW_IF(sent.size() != kBleuNgramOrder * 2 + 1, util::Exception, "Error");
+  std::vector<float> stats(sent.size());
 
   for(size_t i=0; i<sent.size(); i++)
-    stats.push_back(sent[i]+bg[i]);
+    stats[i] = sent[i]+bg[i];
 
   // Calculate BLEU
   float logbleu = 0.0;
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index 8be5675741..affa37fbf0 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -13,7 +13,7 @@
 namespace MosesTuning
 {
 
-const int kBleuNgramOrder = 4;
+const size_t kBleuNgramOrder = 4;
 
 class NgramCounts;
 class Reference;
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 5a12be70a8..a0c6a6ebc4 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -14,6 +14,8 @@
 
 #include <boost/functional/hash.hpp>
 
+#include "util/murmur_hash.hh"
+
 #include "Util.h"
 
 using namespace std;
@@ -59,6 +61,11 @@ void SparseVector::set(const string& name, FeatureStatsType value)
   m_fvector[id] = value;
 }
 
+void SparseVector::set(size_t id, FeatureStatsType value) {
+  assert(m_id_to_name.size() > id);
+  m_fvector[id] = value;
+}
+
 void SparseVector::write(ostream& out, const string& sep) const
 {
   for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
@@ -91,6 +98,16 @@ void SparseVector::load(const string& file)
   }
 }
 
+SparseVector& SparseVector::operator+=(const SparseVector& rhs)
+{
+
+  for (fvector_t::const_iterator i = rhs.m_fvector.begin();
+       i != rhs.m_fvector.end(); ++i) {
+    m_fvector[i->first] =  get(i->first) + (i->second);
+  }
+  return *this;
+}
+
 SparseVector& SparseVector::operator-=(const SparseVector& rhs)
 {
 
@@ -162,12 +179,18 @@ bool operator==(SparseVector const& item1, SparseVector const& item2)
   return item1.m_fvector==item2.m_fvector;
 }
 
+
 std::size_t hash_value(SparseVector const& item)
 {
-  boost::hash<SparseVector::fvector_t> hasher;
-  return hasher(item.m_fvector);
+  size_t seed = 0;
+  for (SparseVector::fvector_t::const_iterator i = item.m_fvector.begin(); i != item.m_fvector.end(); ++i) {
+    seed = util::MurmurHashNative(&(i->first), sizeof(i->first), seed);
+    seed = util::MurmurHashNative(&(i->second), sizeof(i->second), seed);
+  }
+  return seed;
 }
 
+
 FeatureStats::FeatureStats()
   : m_available_size(kAvailableSize), m_entries(0),
     m_array(new FeatureStatsType[m_available_size]) {}
@@ -181,8 +204,7 @@ FeatureStats::FeatureStats(const size_t size)
 
 FeatureStats::~FeatureStats()
 {
-  delete [] m_array;
-  m_array = NULL;
+   delete [] m_array;
 }
 
 void FeatureStats::Copy(const FeatureStats &stats)
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index a882e73581..2ccbac50c1 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -14,6 +14,11 @@
 #include <map>
 #include <string>
 #include <vector>
+
+#include <boost/unordered_map.hpp>
+
+#include <util/string_piece.hh>
+
 #include "Types.h"
 
 namespace MosesTuning
@@ -31,6 +36,7 @@ class SparseVector
   FeatureStatsType get(const std::string& name) const;
   FeatureStatsType get(std::size_t id) const;
   void set(const std::string& name, FeatureStatsType value);
+  void set(size_t id, FeatureStatsType value);
   void clear();
   void load(const std::string& file);
   std::size_t size() const {
@@ -40,6 +46,7 @@ class SparseVector
   void write(std::ostream& out, const std::string& sep = " ") const;
 
   SparseVector& operator-=(const SparseVector& rhs);
+  SparseVector& operator+=(const SparseVector& rhs);
   FeatureStatsType inner_product(const SparseVector& rhs) const;
 
   // Added by cherryc
diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
new file mode 100644
index 0000000000..c88b58e4ca
--- /dev/null
+++ b/mert/ForestRescore.cpp
@@ -0,0 +1,422 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cmath>
+#include <limits>
+#include <list>
+
+#include <boost/unordered_set.hpp>
+
+#include "util/file_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "BleuScorer.h"
+#include "ForestRescore.h"
+
+using namespace std;
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
+  out << "[";
+  for (size_t i = 0; i < wordVec.size(); ++i) {
+    out << wordVec[i]->first;
+    if (i+1< wordVec.size()) out << " ";
+  }
+  out << "]";
+  return out;
+}
+
+
+void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
+  for (size_t i = 0; i < files.size(); ++i) {
+    util::FilePiece fh(files[i].c_str());
+    size_t sentenceId = 0;
+    while(true) {
+      StringPiece line;
+      try {
+        line = fh.ReadLine();
+      } catch (util::EndOfFileException &e) {
+        break;
+      }
+     AddLine(sentenceId, line, vocab);
+     ++sentenceId;
+    }
+  }
+
+}
+
+void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab) {
+  //cerr << line << endl;
+  NgramCounter ngramCounts;
+  list<WordVec> openNgrams;
+  size_t length = 0;
+  //tokenize & count
+  for (util::TokenIter<util::SingleCharacter, true> j(line, util::SingleCharacter(' ')); j; ++j) {
+    const Vocab::Entry* nextTok = &(vocab.FindOrAdd(*j));
+    ++length;
+    openNgrams.push_front(WordVec());
+    for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
+      k->push_back(nextTok);
+      ++ngramCounts[*k]; 
+    }
+    if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
+  }
+
+  //merge into overall ngram map
+  for (NgramCounter::const_iterator ni = ngramCounts.begin();
+    ni != ngramCounts.end(); ++ni) {
+    size_t count = ni->second;
+    //cerr << *ni << " " << count <<  endl;
+    if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
+    NgramMap::iterator totalsIter = ngramCounts_[sentenceId].find(ni->first);
+    if (totalsIter == ngramCounts_[sentenceId].end()) {
+      ngramCounts_[sentenceId][ni->first] = pair<size_t,size_t>(count,count);
+    } else {
+      ngramCounts_[sentenceId][ni->first].first = max(count, ngramCounts_[sentenceId][ni->first].first); //clip
+      ngramCounts_[sentenceId][ni->first].second += count; //no clip
+    }
+  }
+  //length
+  if (lengths_.size() <= sentenceId) lengths_.resize(sentenceId+1);
+  //TODO - length strategy - this is MIN
+  if (!lengths_[sentenceId]) {
+    lengths_[sentenceId] = length;
+  } else {
+    lengths_[sentenceId] = min(length,lengths_[sentenceId]);
+  }
+  //cerr << endl;
+
+}
+  
+size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool clip) const  {
+  const NgramMap& ngramCounts = ngramCounts_.at(sentenceId);
+  NgramMap::const_iterator ngi = ngramCounts.find(ngram);
+  if (ngi == ngramCounts.end()) return 0;
+  return clip ? ngi->second.first : ngi->second.second;
+}
+
+VertexState::VertexState(): bleuStats(kBleuNgramOrder), targetLength(0) {}
+
+void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
+  for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+    //cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) <<  endl;
+    size_t order = ngi->first.size();
+    size_t count = ngi->second;
+    bleuStats[(order-1)*2 + 1] += count;
+    bleuStats[(order-1) * 2] += min(count, references_.NgramMatches(sentenceId_,ngi->first,false));
+  }
+}
+
+size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
+  size_t targetLength = 0;
+  for (size_t i = 0; i < edge.Words().size(); ++i) {
+    const Vocab::Entry* word = edge.Words()[i];
+    if (word) ++targetLength;
+  }
+  for (size_t i = 0; i < edge.Children().size(); ++i) {
+    const VertexState& state = vertexStates_[edge.Children()[i]];
+    targetLength += state.targetLength;
+  }
+  return targetLength;
+}
+
+FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
+  NgramCounter ngramCounts;
+  size_t childId = 0;
+  size_t wordId = 0;
+  size_t contextId = 0; //position within left or right context
+  const VertexState* vertexState = NULL;
+  bool inLeftContext = false;
+  bool inRightContext = false;
+  list<WordVec> openNgrams;
+  const Vocab::Entry* currentWord = NULL;
+  while (wordId < edge.Words().size()) { 
+    currentWord = edge.Words()[wordId];
+    if (currentWord != NULL) {
+      ++wordId;
+    } else {
+      if (!inLeftContext && !inRightContext) {
+        //entering a vertex
+        assert(!vertexState);
+        vertexState = &(vertexStates_[edge.Children()[childId]]);
+        ++childId;
+        if (vertexState->leftContext.size()) {
+          inLeftContext = true;
+          contextId = 0;
+          currentWord = vertexState->leftContext[contextId];
+        } else {
+          //empty context
+          vertexState = NULL;
+          ++wordId;
+          continue;
+        }
+      } else {
+        //already in a vertex
+        ++contextId;
+        if (inLeftContext && contextId < vertexState->leftContext.size()) {
+          //still in left context
+          currentWord = vertexState->leftContext[contextId];
+        } else if (inLeftContext) {
+          //at end of left context
+          if (vertexState->leftContext.size() == kBleuNgramOrder-1) {
+            //full size context, jump to right state
+            openNgrams.clear();
+            inLeftContext = false;
+            inRightContext = true;
+            contextId = 0;
+            currentWord = vertexState->rightContext[contextId];
+          } else {
+            //short context, just ignore right context
+            inLeftContext = false;
+            vertexState = NULL;
+            ++wordId;
+            continue;
+          }
+        } else {
+          //in right context
+          if (contextId < vertexState->rightContext.size()) {
+            currentWord = vertexState->rightContext[contextId];
+          } else {
+            //leaving vertex
+            inRightContext = false;
+            vertexState = NULL;
+            ++wordId;
+            continue;
+          }
+        }
+      }
+    }
+    assert(currentWord);
+    if (graph_.IsBoundary(currentWord)) continue;
+    openNgrams.push_front(WordVec());
+    openNgrams.front().reserve(kBleuNgramOrder);
+    for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
+      k->push_back(currentWord);
+      //Only insert ngrams that cross boundaries
+      if (!vertexState || (inLeftContext && k->size() > contextId+1)) ++ngramCounts[*k];
+    }
+    if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
+  }
+  
+  //Collect matches
+  //This edge
+  //cerr << "edge ngrams" << endl;
+  UpdateMatches(ngramCounts, bleuStats);
+
+  //Child vertexes
+  for (size_t i = 0; i < edge.Children().size(); ++i) {
+    //cerr << "vertex ngrams " << edge.Children()[i] << endl;
+    for (size_t j = 0; j < bleuStats.size(); ++j) {
+      bleuStats[j] += vertexStates_[edge.Children()[i]].bleuStats[j];
+    }
+  }
+  
+
+  FeatureStatsType sourceLength = head.SourceCovered();
+  size_t referenceLength = references_.Length(sentenceId_);
+  FeatureStatsType effectiveReferenceLength = 
+    sourceLength / totalSourceLength_ * referenceLength;
+
+  bleuStats[bleuStats.size()-1] = effectiveReferenceLength;
+  //backgroundBleu_[backgroundBleu_.size()-1] = 
+  //  backgroundRefLength_ * sourceLength / totalSourceLength_;
+  FeatureStatsType bleu = sentenceLevelBackgroundBleu(bleuStats, backgroundBleu_);
+
+  return bleu;
+}
+
+void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
+  //TODO: Maybe more efficient to absorb into the Score() method
+  VertexState& vertexState = vertexStates_[vertexId];
+  //cerr << "Updating state for " << vertexId << endl;
+  
+  //leftContext
+  int wi = 0;
+  const VertexState* childState = NULL;
+  int contexti = 0; //index within child context
+  int childi = 0;
+  while (vertexState.leftContext.size() < (kBleuNgramOrder-1)) {
+    if ((size_t)wi >= winnerEdge.Words().size()) break;
+    const Vocab::Entry* word = winnerEdge.Words()[wi];
+    if (word != NULL) {
+      vertexState.leftContext.push_back(word);
+      ++wi;
+    } else {
+      if (childState == NULL) {
+        //start of child state
+        childState = &(vertexStates_[winnerEdge.Children()[childi++]]);
+        contexti = 0;
+      } 
+      if ((size_t)contexti < childState->leftContext.size()) {
+        vertexState.leftContext.push_back(childState->leftContext[contexti++]); 
+      } else {
+        //end of child context
+        childState = NULL;
+        ++wi;
+      }
+    }
+  }
+
+  //rightContext
+  wi = winnerEdge.Words().size() - 1;
+  childState = NULL;
+  childi = winnerEdge.Children().size() - 1;
+  while (vertexState.rightContext.size() < (kBleuNgramOrder-1)) {
+    if (wi < 0) break;
+    const Vocab::Entry* word = winnerEdge.Words()[wi];
+    if (word != NULL) {
+      vertexState.rightContext.push_back(word);
+      --wi;
+    } else {
+      if (childState == NULL) {
+        //start (ie rhs) of child state
+        childState = &(vertexStates_[winnerEdge.Children()[childi--]]);
+        contexti = childState->rightContext.size()-1;
+      }
+      if (contexti >= 0) {
+        vertexState.rightContext.push_back(childState->rightContext[contexti--]);
+      } else {
+        //end (ie lhs) of child context
+        childState = NULL;
+        --wi;
+      }
+    }
+  }
+  reverse(vertexState.rightContext.begin(), vertexState.rightContext.end());
+
+  //length + counts
+  vertexState.targetLength = GetTargetLength(winnerEdge);
+  vertexState.bleuStats = bleuStats;
+}
+
+
+typedef pair<const Edge*,FeatureStatsType> BackPointer;
+
+
+/**
+ * Recurse through back pointers
+ **/
+static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
+     HgHypothesis* bestHypo) {
+  //cerr << "Expanding " << vertexId << endl;
+  //UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
+  if (!bps[vertexId].first) return;
+  const Edge* prevEdge = bps[vertexId].first;
+  bestHypo->featureVector += *(prevEdge->Features().get());
+  size_t childId = 0;
+  for (size_t i = 0; i < prevEdge->Words().size(); ++i) {
+    if (prevEdge->Words()[i] != NULL) {
+      bestHypo->text.push_back(prevEdge->Words()[i]);
+    } else {
+      size_t childVertexId = prevEdge->Children()[childId++];
+      HgHypothesis childHypo;
+      GetBestHypothesis(childVertexId,graph,bps,&childHypo);
+      bestHypo->text.insert(bestHypo->text.end(), childHypo.text.begin(), childHypo.text.end());
+      bestHypo->featureVector += childHypo.featureVector;
+    }
+  }
+}
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu,  HgHypothesis* bestHypo) 
+{
+  BackPointer init(NULL,kMinScore);
+  vector<BackPointer> backPointers(graph.VertexSize(),init);
+  HgBleuScorer bleuScorer(references, graph, sentenceId, backgroundBleu);
+  vector<FeatureStatsType> winnerStats(kBleuNgramOrder*2+1);
+  for (size_t vi = 0; vi < graph.VertexSize(); ++vi) {
+    //cerr << "vertex id " << vi <<  endl;
+    FeatureStatsType winnerScore = kMinScore;
+    const Vertex& vertex = graph.GetVertex(vi);
+    const vector<const Edge*>& incoming = vertex.GetIncoming();
+    if (!incoming.size()) {
+      //UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
+      //If no incoming edges, vertex is a dead end
+      backPointers[vi].first = NULL;
+      backPointers[vi].second = kMinScore/2;  
+    } else {
+      //cerr << "\nVertex: " << vi << endl;
+      for (size_t ei = 0; ei < incoming.size(); ++ei) {
+        //cerr << "edge id " << ei << endl;
+        FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+        for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+          size_t childId = incoming[ei]->Children()[i];
+          UTIL_THROW_IF(backPointers[childId].second == kMinScore,
+            HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+          incomingScore += backPointers[childId].second;
+        }
+        vector<FeatureStatsType> bleuStats(kBleuNgramOrder*2+1);
+       // cerr << "Score: " << incomingScore << " Bleu: ";
+       // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
+        FeatureStatsType totalScore = incomingScore;
+        if (bleuWeight) { 
+          FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
+          UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
+          totalScore += bleuWeight * bleuScore;
+        //  cerr << bleuScore << " Total: " << incomingScore << endl << endl;
+          //cerr << "is " << incomingScore << " bs " << bleuScore << endl;
+        }
+        if (totalScore >= winnerScore) {
+          //We only store the feature score (not the bleu score) with the vertex,
+          //since the bleu score is always cumulative, ie from counts for the whole span.
+          winnerScore = totalScore;
+          backPointers[vi].first = incoming[ei];
+          backPointers[vi].second = incomingScore;
+          winnerStats = bleuStats;
+        }
+      }
+      //update with winner
+      //if (bleuWeight) {
+      //TODO: Not sure if we need this when computing max-model solution
+      bleuScorer.UpdateState(*(backPointers[vi].first), vi, winnerStats);
+
+    }
+  }
+
+  //expand back pointers
+  GetBestHypothesis(graph.VertexSize()-1, graph, backPointers, bestHypo);
+
+  //bleu stats and fv
+
+  //Need the actual (clipped) stats
+  //TODO: This repeats code in bleu scorer - factor out
+  bestHypo->bleuStats.resize(kBleuNgramOrder*2+1);
+  NgramCounter counts;
+  list<WordVec> openNgrams;
+  for (size_t i = 0; i < bestHypo->text.size(); ++i) {
+    const Vocab::Entry* entry = bestHypo->text[i];
+    if (graph.IsBoundary(entry)) continue;
+    openNgrams.push_front(WordVec());
+    for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
+      k->push_back(entry);
+      ++counts[*k];
+    }
+    if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
+  }
+  for (NgramCounter::const_iterator ngi = counts.begin(); ngi != counts.end(); ++ngi) {
+    size_t order = ngi->first.size();
+    size_t count = ngi->second;
+    bestHypo->bleuStats[(order-1)*2 + 1] += count;
+    bestHypo->bleuStats[(order-1) * 2] += min(count, references.NgramMatches(sentenceId,ngi->first,true));
+  }
+  bestHypo->bleuStats[kBleuNgramOrder*2] = references.Length(sentenceId);
+}
+
+
+};
diff --git a/mert/ForestRescore.h b/mert/ForestRescore.h
new file mode 100644
index 0000000000..900275b747
--- /dev/null
+++ b/mert/ForestRescore.h
@@ -0,0 +1,120 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef MERT_FOREST_RESCORE_H
+#define MERT_FOREST_RESCORE_H
+
+#include <valarray>
+#include <vector>
+
+#include <boost/unordered_set.hpp>
+
+#include "BleuScorer.h"
+#include "Hypergraph.h"
+
+namespace MosesTuning {
+
+std::ostream& operator<<(std::ostream& out, const WordVec& wordVec);
+
+struct NgramHash : public std::unary_function<const WordVec&, std::size_t> {
+  std::size_t operator()(const WordVec& ngram) const {
+    return util::MurmurHashNative(&(ngram[0]), ngram.size() * sizeof(WordVec::value_type));
+  }
+};
+
+struct NgramEquals : public std::binary_function<const WordVec&, const WordVec&, bool> {
+  bool operator()(const WordVec& first, const WordVec& second) const {
+    if (first.size() != second.size()) return false;
+    return memcmp(&(first[0]), &(second[0]), first.size() * sizeof(WordVec::value_type)) == 0;
+  }
+};
+
+typedef boost::unordered_map<WordVec, size_t, NgramHash, NgramEquals> NgramCounter;
+
+
+class ReferenceSet {
+
+
+public:
+  
+  void AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab);
+
+  void Load(const std::vector<std::string>& files, Vocab& vocab);
+
+  size_t NgramMatches(size_t sentenceId, const WordVec&, bool clip) const;
+
+  size_t Length(size_t sentenceId) const {return lengths_[sentenceId];}
+
+private:
+  //ngrams to (clipped,unclipped) counts
+  typedef boost::unordered_map<WordVec, std::pair<std::size_t,std::size_t>, NgramHash,NgramEquals> NgramMap;
+  std::vector<NgramMap> ngramCounts_;
+  std::vector<size_t> lengths_;
+
+};
+
+struct VertexState {
+  VertexState();
+
+  std::vector<FeatureStatsType> bleuStats;
+  WordVec leftContext;
+  WordVec rightContext;
+  size_t targetLength;
+};
+
+/**
+  * Used to score an rule (ie edge) when we are applying it.
+**/
+class HgBleuScorer {
+  public:
+    HgBleuScorer(const ReferenceSet& references, const Graph& graph, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu):
+    references_(references), sentenceId_(sentenceId), graph_(graph), backgroundBleu_(backgroundBleu),
+      backgroundRefLength_(backgroundBleu[kBleuNgramOrder*2]) {
+      vertexStates_.resize(graph.VertexSize());
+      totalSourceLength_ = graph.GetVertex(graph.VertexSize()-1).SourceCovered();
+    }
+
+    FeatureStatsType Score(const Edge& edge, const Vertex& head, std::vector<FeatureStatsType>& bleuStats) ;
+
+    void UpdateState(const Edge& winnerEdge, size_t vertexId, const std::vector<FeatureStatsType>& bleuStats);
+
+
+  private:
+    const ReferenceSet& references_;
+    std::vector<VertexState> vertexStates_;
+    size_t sentenceId_;
+    size_t totalSourceLength_;
+    const Graph& graph_;
+    std::vector<FeatureStatsType> backgroundBleu_;
+    FeatureStatsType backgroundRefLength_;
+
+    void UpdateMatches(const NgramCounter& counter, std::vector<FeatureStatsType>& bleuStats) const;
+    size_t GetTargetLength(const Edge& edge) const;
+};
+
+struct HgHypothesis {
+  SparseVector featureVector;
+  WordVec text;
+  std::vector<FeatureStatsType> bleuStats;
+};
+
+void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references, size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo);
+
+};
+
+#endif
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
new file mode 100644
index 0000000000..86975d3a50
--- /dev/null
+++ b/mert/ForestRescoreTest.cpp
@@ -0,0 +1,246 @@
+#include <iostream>
+
+#include "ForestRescore.h"
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
+{
+  Vocab vocab;
+  WordVec words;
+  string wordStrings[] =
+    {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g"};
+  for (size_t i = 0; i < 9; ++i) {
+    words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+  }
+
+  const string f1 = "foo";
+  const string f2 = "bar";
+  Graph graph(vocab);
+  graph.SetCounts(5,5);
+
+  Edge* e0 = graph.NewEdge();
+  e0->AddWord(words[0]);
+  e0->AddFeature(f1, 2.0);
+
+  Vertex* v0 = graph.NewVertex();
+  v0->AddEdge(e0);
+
+  Edge* e1 = graph.NewEdge();
+  e1->AddWord(NULL);
+  e1->AddChild(0);
+  e1->AddWord(words[2]);
+  e1->AddWord(words[3]);
+  e1->AddFeature(f1, 1.0);
+  e1->AddFeature(f2, 3.0);
+
+  Vertex* v1 = graph.NewVertex();
+  v1->AddEdge(e1);
+
+  Edge* e2 = graph.NewEdge();
+  e2->AddWord(NULL);
+  e2->AddChild(1);
+  e2->AddWord(words[4]);
+  e2->AddWord(words[5]);
+  e2->AddFeature(f2, 2.5);
+
+  Vertex* v2 = graph.NewVertex();
+  v2->AddEdge(e2);
+
+  Edge* e3 = graph.NewEdge();
+  e3->AddWord(NULL);
+  e3->AddChild(2);
+  e3->AddWord(words[6]);
+  e3->AddWord(words[7]);
+  e3->AddWord(words[8]);
+  e3->AddFeature(f1, -1);
+
+  Vertex* v3 = graph.NewVertex();
+  v3->AddEdge(e3);
+
+  Edge* e4 = graph.NewEdge();
+  e4->AddWord(NULL);
+  e4->AddChild(3);
+  e4->AddWord(words[1]);
+  e3->AddFeature(f2, 0.5);
+
+  Vertex* v4 = graph.NewVertex();
+  v4->AddEdge(e4);
+
+  ReferenceSet references;
+  references.AddLine(0, "a b c k e f o", vocab);
+  HgHypothesis modelHypo;
+  vector<FeatureStatsType> bg(kBleuNgramOrder*2+1);
+  SparseVector weights;
+  weights.set(f1,2);
+  weights.set(f2,1);
+  Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+  BOOST_CHECK_CLOSE(2.0,modelHypo.featureVector.get(f1), 0.0001);
+  BOOST_CHECK_CLOSE(6.0,modelHypo.featureVector.get(f2), 0.0001);
+
+  BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+  BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+  BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+  BOOST_CHECK_EQUAL(words[4]->first, modelHypo.text[3]->first);
+  BOOST_CHECK_EQUAL(words[5]->first, modelHypo.text[4]->first);
+  BOOST_CHECK_EQUAL(words[6]->first, modelHypo.text[5]->first);
+  BOOST_CHECK_EQUAL(words[7]->first, modelHypo.text[6]->first);
+  BOOST_CHECK_EQUAL(words[8]->first, modelHypo.text[7]->first);
+  BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[8]->first);
+}
+
+
+
+BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
+{
+  Vocab vocab;
+  WordVec words;
+  string wordStrings[] =
+    {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+  for (size_t i = 0; i < 13; ++i) {
+    words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+  }
+
+  const string f1 = "foo";
+  const string f2 = "bar";
+  Graph graph(vocab);
+  graph.SetCounts(5,8);
+
+  Edge* e0 = graph.NewEdge();
+  e0->AddWord(words[0]);
+
+  Vertex* v0 = graph.NewVertex();
+  v0->AddEdge(e0);
+
+  Edge* e1 = graph.NewEdge();
+  e1->AddWord(NULL);
+  e1->AddChild(0);
+  e1->AddWord(words[2]);
+  e1->AddWord(words[3]);
+  e1->AddFeature(f1,1);
+  e1->AddFeature(f2,1);
+  Edge* e5 = graph.NewEdge();
+  e5->AddWord(NULL);
+  e5->AddChild(0);
+  e5->AddWord(words[9]);
+  e5->AddWord(words[10]);
+  e5->AddFeature(f1,2);
+  e5->AddFeature(f2,-2);
+
+  Vertex* v1 = graph.NewVertex();
+  v1->AddEdge(e1);
+  v1->AddEdge(e5);
+  v1->SetSourceCovered(1);
+
+  Edge* e2 = graph.NewEdge();
+  e2->AddWord(NULL);
+  e2->AddChild(1);
+  e2->AddWord(words[4]);
+  e2->AddWord(words[5]);
+  e2->AddFeature(f2,3);
+
+  Vertex* v2 = graph.NewVertex();
+  v2->AddEdge(e2);
+  v2->SetSourceCovered(3);
+
+  Edge* e3 = graph.NewEdge();
+  e3->AddWord(NULL);
+  e3->AddChild(2);
+  e3->AddWord(words[6]);
+  e3->AddWord(words[7]);
+  e3->AddWord(words[8]);
+  e3->AddFeature(f1,1);
+  Edge* e6 = graph.NewEdge();
+  e6->AddWord(NULL);
+  e6->AddChild(2);
+  e6->AddWord(words[9]);
+  e6->AddWord(words[12]);
+  e6->AddFeature(f2,1);
+  Edge* e7 = graph.NewEdge();
+  e7->AddWord(NULL);
+  e7->AddChild(1);
+  e7->AddWord(words[11]);
+  e7->AddWord(words[12]);
+  e7->AddFeature(f1,2);
+  e7->AddFeature(f2,3);
+
+  Vertex* v3 = graph.NewVertex();
+  v3->AddEdge(e3);
+  v3->AddEdge(e6);
+  v3->AddEdge(e7);
+  v3->SetSourceCovered(5);
+
+  Edge* e4 = graph.NewEdge();
+  e4->AddWord(NULL);
+  e4->AddChild(3);
+  e4->AddWord(words[1]);
+
+  Vertex* v4 = graph.NewVertex();
+  v4->AddEdge(e4);
+  v4->SetSourceCovered(6);
+
+  /*Paths     || foo || bar || s(2,1)
+   ab cd hk   || 1   ||  5  || 7
+   hi cd hk   || 2   ||  2  || 6
+   ab jk      || 3   ||  4  || 10
+   hi jk      || 4   ||  1  || 9
+   ab cd efg  || 2   ||  4  || 8
+   hi cd efg  || 3   ||  1  || 7
+  */
+
+  ReferenceSet references;
+  references.AddLine(0, "a b c d h k", vocab);
+  HgHypothesis modelHypo;
+  vector<FeatureStatsType> bg(kBleuNgramOrder*2+1, 0.1);
+  SparseVector weights;
+  weights.set(f1,2);
+  weights.set(f2,1);
+  Viterbi(graph, weights, 0, references, 0, bg, &modelHypo);
+  BOOST_CHECK_CLOSE(3.0,modelHypo.featureVector.get(f1), 0.0001);
+  BOOST_CHECK_CLOSE(4.0,modelHypo.featureVector.get(f2), 0.0001);
+
+  BOOST_CHECK_EQUAL(6, modelHypo.text.size());
+
+  //expect ab jk
+  BOOST_CHECK_EQUAL(words[0]->first, modelHypo.text[0]->first);
+  BOOST_CHECK_EQUAL(words[2]->first, modelHypo.text[1]->first);
+  BOOST_CHECK_EQUAL(words[3]->first, modelHypo.text[2]->first);
+  BOOST_CHECK_EQUAL(words[11]->first, modelHypo.text[3]->first);
+  BOOST_CHECK_EQUAL(words[12]->first, modelHypo.text[4]->first);
+  BOOST_CHECK_EQUAL(words[1]->first, modelHypo.text[5]->first);
+
+
+  HgHypothesis hopeHypo;
+  Viterbi(graph, weights, 1, references, 0, bg, &hopeHypo);
+  //expect abcdhk
+  BOOST_CHECK_EQUAL(8, hopeHypo.text.size());
+
+  BOOST_CHECK_EQUAL(words[0]->first, hopeHypo.text[0]->first);
+  BOOST_CHECK_EQUAL(words[2]->first, hopeHypo.text[1]->first);
+  BOOST_CHECK_EQUAL(words[3]->first, hopeHypo.text[2]->first);
+  BOOST_CHECK_EQUAL(words[4]->first, hopeHypo.text[3]->first);
+  BOOST_CHECK_EQUAL(words[5]->first, hopeHypo.text[4]->first);
+  BOOST_CHECK_EQUAL(words[9]->first, hopeHypo.text[5]->first);
+  BOOST_CHECK_EQUAL(words[12]->first, hopeHypo.text[6]->first);
+  BOOST_CHECK_EQUAL(words[1]->first, hopeHypo.text[7]->first);
+
+  BOOST_CHECK_EQUAL(kBleuNgramOrder*2+1, hopeHypo.bleuStats.size());
+  BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[0]);
+  BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[1]);
+  BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[2]);
+  BOOST_CHECK_EQUAL(5, hopeHypo.bleuStats[3]);
+  BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[4]);
+  BOOST_CHECK_EQUAL(4, hopeHypo.bleuStats[5]);
+  BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[6]);
+  BOOST_CHECK_EQUAL(3, hopeHypo.bleuStats[7]);
+  BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
+}
+
+
+
diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
new file mode 100644
index 0000000000..5f5c599660
--- /dev/null
+++ b/mert/HopeFearDecoder.cpp
@@ -0,0 +1,328 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include <cmath>
+#include <iterator>
+
+#include <boost/filesystem.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+
+#include "BleuScorer.h"
+#include "HopeFearDecoder.h"
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+namespace MosesTuning {
+
+static const ValType BLEU_RATIO = 5;
+
+ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
+  vector<ValType> stats(kBleuNgramOrder*2+1,0);
+  for(reset(); !finished(); next()) {
+    vector<ValType> sent;
+    MaxModel(wv,&sent);
+    for(size_t i=0; i<sent.size(); i++) {
+      stats[i]+=sent[i];
+    }
+  }
+  return unsmoothedBleu(stats);
+}
+
+NbestHopeFearDecoder::NbestHopeFearDecoder(
+      const vector<string>& featureFiles,
+      const vector<string>&  scoreFiles,
+      bool streaming,
+      bool  no_shuffle,
+      bool safe_hope
+      ) : safe_hope_(safe_hope) {
+  if (streaming) {
+    train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
+  } else {
+    train_.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
+  }
+}
+
+
+void NbestHopeFearDecoder::next() {
+  train_->next();
+}
+
+bool NbestHopeFearDecoder::finished() {
+  return train_->finished();
+}
+
+void NbestHopeFearDecoder::reset() {
+  train_->reset();
+}
+
+void NbestHopeFearDecoder::HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              ) {
+
+  
+  // Hope / fear decode
+  ValType hope_scale = 1.0;
+  size_t hope_index=0, fear_index=0, model_index=0;
+  ValType hope_score=0, fear_score=0, model_score=0;
+  for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+    ValType hope_bleu, hope_model;
+    for(size_t i=0; i< train_->cur_size(); i++) {
+      const MiraFeatureVector& vec=train_->featuresAt(i);
+      ValType score = wv.score(vec);
+      ValType bleu = sentenceLevelBackgroundBleu(train_->scoresAt(i),backgroundBleu);
+      // Hope
+      if(i==0 || (hope_scale*score + bleu) > hope_score) {
+        hope_score = hope_scale*score + bleu;
+        hope_index = i;
+        hope_bleu = bleu;
+        hope_model = score;
+      }
+      // Fear
+      if(i==0 || (score - bleu) > fear_score) {
+        fear_score = score - bleu;
+        fear_index = i;
+      }
+      // Model
+      if(i==0 || score > model_score) {
+        model_score = score;
+        model_index = i;
+      }
+    }
+    // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+    // where model score is having far more influence than BLEU
+    hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+    if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+      hope_scale = abs(hope_bleu) / abs(hope_model);
+    else break;
+  }
+  hopeFear->modelFeatures = train_->featuresAt(model_index);
+  hopeFear->hopeFeatures = train_->featuresAt(hope_index);
+  hopeFear->fearFeatures = train_->featuresAt(fear_index);
+
+  hopeFear->hopeStats = train_->scoresAt(hope_index);
+  hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+  const vector<float>& fear_stats = train_->scoresAt(fear_index);
+  hopeFear->fearBleu = sentenceLevelBackgroundBleu(fear_stats, backgroundBleu);
+
+  hopeFear->modelStats = train_->scoresAt(model_index);
+  hopeFear->hopeFearEqual = (hope_index == fear_index);
+}
+
+void NbestHopeFearDecoder::MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats) {
+  // Find max model
+  size_t max_index=0;
+  ValType max_score=0;
+  for(size_t i=0; i<train_->cur_size(); i++) {
+    MiraFeatureVector vec(train_->featuresAt(i));
+    ValType score = wv.score(vec);
+    if(i==0 || score > max_score) {
+      max_index = i;
+      max_score = score;
+    }
+  }
+  *stats = train_->scoresAt(max_index);
+}
+
+
+
+HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
+                          (
+                            const string& hypergraphDir,
+                            const vector<string>& referenceFiles,
+                            size_t num_dense,
+                            bool streaming,
+                            bool no_shuffle,
+                            bool safe_hope,
+                            size_t hg_pruning,
+                            const MiraWeightVector& wv
+                          ) :
+                          num_dense_(num_dense) {
+
+  UTIL_THROW_IF(streaming, util::Exception, "Streaming not currently supported for hypergraphs");
+  UTIL_THROW_IF(!fs::exists(hypergraphDir), HypergraphException, "Directory '" << hypergraphDir << "' does not exist");
+  UTIL_THROW_IF(!referenceFiles.size(), util::Exception, "No reference files supplied");
+  references_.Load(referenceFiles, vocab_);
+
+  SparseVector weights;
+  wv.ToSparse(&weights);
+
+  static const string kWeights = "weights";
+  fs::directory_iterator dend;
+  size_t fileCount = 0;
+  cerr << "Reading hypergraphs" << endl;
+  for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
+    if (di->path().filename() == kWeights) continue;
+    Graph graph(vocab_);
+    size_t id = boost::lexical_cast<size_t>(di->path().stem().string());
+    util::FilePiece file(di->path().string().c_str());
+    ReadGraph(file,graph);
+
+    //cerr << "ref length " << references_.Length(id) << endl;
+    size_t edgeCount = hg_pruning * references_.Length(id);
+    boost::shared_ptr<Graph> prunedGraph;
+    prunedGraph.reset(new Graph(vocab_));
+    graph.Prune(prunedGraph.get(), weights, edgeCount);
+    graphs_[id] = prunedGraph;
+    //cerr << "Pruning to v=" << graphs_[id]->VertexSize() << " e=" << graphs_[id]->EdgeSize()  << endl;
+    ++fileCount;
+    if (fileCount % 10 == 0) cerr << ".";
+    if (fileCount % 400 ==  0) cerr << " [count=" << fileCount << "]\n";
+  }
+  cerr << endl << "Done" << endl;
+
+
+}
+
+void HypergraphHopeFearDecoder::reset() {
+  graphIter_ = graphs_.begin();
+}
+
+void HypergraphHopeFearDecoder::next() {
+  ++graphIter_;
+}
+
+bool HypergraphHopeFearDecoder::finished() {
+  return graphIter_ == graphs_.end();
+}
+
+void HypergraphHopeFearDecoder::HopeFear(
+            const vector<ValType>& backgroundBleu,
+            const MiraWeightVector& wv,
+            HopeFearData* hopeFear
+            ) {
+  size_t sentenceId = graphIter_->first;
+  SparseVector weights;
+  wv.ToSparse(&weights);
+  const Graph& graph = *(graphIter_->second);
+
+  ValType hope_scale = 1.0;
+  HgHypothesis hopeHypo, fearHypo, modelHypo;
+  for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+
+    //hope decode
+    Viterbi(graph, weights, 1, references_, sentenceId, backgroundBleu, &hopeHypo);
+
+    //fear decode
+    Viterbi(graph, weights, -1, references_, sentenceId, backgroundBleu, &fearHypo);
+
+    //Model decode
+    Viterbi(graph, weights, 0, references_, sentenceId, backgroundBleu, &modelHypo);
+
+
+  // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+    // where model score is having far more influence than BLEU
+  //  hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+  //  if(safe_hope_ && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+  //    hope_scale = abs(hope_bleu) / abs(hope_model);
+  //  else break;
+    //TODO: Don't currently get model and bleu so commented this out for now.
+    break;
+  }
+  //modelFeatures, hopeFeatures and fearFeatures
+  hopeFear->modelFeatures = MiraFeatureVector(modelHypo.featureVector, num_dense_);
+  hopeFear->hopeFeatures = MiraFeatureVector(hopeHypo.featureVector, num_dense_);
+  hopeFear->fearFeatures = MiraFeatureVector(fearHypo.featureVector, num_dense_);
+
+  //Need to know which are to be mapped to dense features!
+
+  //Only C++11
+  //hopeFear->modelStats.assign(std::begin(modelHypo.bleuStats), std::end(modelHypo.bleuStats));
+  vector<ValType> fearStats(kBleuNgramOrder*2+1);
+  hopeFear->hopeStats.reserve(kBleuNgramOrder*2+1);
+  hopeFear->modelStats.reserve(kBleuNgramOrder*2+1);
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    hopeFear->modelStats.push_back(modelHypo.bleuStats[i]);
+    hopeFear->hopeStats.push_back(hopeHypo.bleuStats[i]);
+
+    fearStats[i] = fearHypo.bleuStats[i];
+  }
+  /*
+  cerr << "hope" << endl;;
+  for (size_t i = 0; i < hopeHypo.text.size(); ++i) {
+    cerr << hopeHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    cerr << hopeHypo.bleuStats[i] << " ";
+  }
+  cerr << endl;
+  cerr << "fear";
+  for (size_t i = 0; i < fearHypo.text.size(); ++i) {
+    cerr << fearHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    cerr  << fearHypo.bleuStats[i] << " ";
+  }
+  cerr << endl;
+  cerr << "model";
+  for (size_t i = 0; i < modelHypo.text.size(); ++i) {
+    cerr << modelHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  for (size_t i = 0; i < fearStats.size(); ++i) {
+    cerr << modelHypo.bleuStats[i] << " ";
+  }
+  cerr << endl;
+  */
+  hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
+  hopeFear->fearBleu = sentenceLevelBackgroundBleu(fearStats, backgroundBleu);
+
+  //If fv and bleu stats are equal, then assume equal
+  hopeFear->hopeFearEqual = true; //(hopeFear->hopeBleu - hopeFear->fearBleu) >= 1e-8;
+  if (hopeFear->hopeFearEqual) {
+    for (size_t i = 0; i < fearStats.size(); ++i) {
+      if (fearStats[i] != hopeFear->hopeStats[i]) {
+         hopeFear->hopeFearEqual = false;
+         break;
+      }
+    }
+  }
+  hopeFear->hopeFearEqual = hopeFear->hopeFearEqual && (hopeFear->fearFeatures == hopeFear->hopeFeatures);
+}
+
+void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
+  assert(!finished());
+  HgHypothesis bestHypo;
+  size_t sentenceId = graphIter_->first;
+  SparseVector weights;
+  wv.ToSparse(&weights);
+  vector<ValType> bg(kBleuNgramOrder*2+1);
+  Viterbi(*(graphIter_->second), weights, 0, references_, sentenceId, bg, &bestHypo);
+  stats->resize(bestHypo.bleuStats.size());
+  /*
+  for (size_t i = 0; i < bestHypo.text.size(); ++i) {
+    cerr << bestHypo.text[i]->first << " ";
+  }
+  cerr << endl;
+  */
+  for (size_t i = 0; i < bestHypo.bleuStats.size(); ++i) {
+    (*stats)[i] = bestHypo.bleuStats[i];
+  }
+}
+
+
+
+};
diff --git a/mert/HopeFearDecoder.h b/mert/HopeFearDecoder.h
new file mode 100644
index 0000000000..e8323fc76a
--- /dev/null
+++ b/mert/HopeFearDecoder.h
@@ -0,0 +1,151 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#ifndef MERT_HOPEFEARDECODER_H
+#define MERT_HOPEFEARDECODER_H
+
+#include <vector>
+
+#include <boost/scoped_ptr.hpp>
+#include <boost/shared_ptr.hpp>
+
+#include "ForestRescore.h"
+#include "Hypergraph.h"
+#include "HypPackEnumerator.h"
+#include "MiraFeatureVector.h"
+#include "MiraWeightVector.h"
+
+//
+// Used by batch mira to get the hope, fear and model hypothesis. This wraps
+// the n-best list and lattice/hypergraph implementations
+//
+
+namespace MosesTuning {
+
+/** To be filled in by the decoder */
+struct HopeFearData {
+  MiraFeatureVector modelFeatures;
+  MiraFeatureVector hopeFeatures;
+  MiraFeatureVector fearFeatures;
+  
+  std::vector<float> modelStats;
+  std::vector<float> hopeStats;
+
+  ValType hopeBleu;
+  ValType fearBleu;
+
+  bool hopeFearEqual;
+};
+
+//Abstract base class
+class HopeFearDecoder {
+public:
+  //iterator methods
+  virtual void reset() = 0;
+  virtual void next() = 0;
+  virtual bool finished() = 0;
+
+  /**
+    * Calculate hope, fear and model hypotheses
+    **/
+  virtual void HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              ) = 0;
+
+  /** Max score decoding */
+  virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats)
+    = 0;
+
+  /** Calculate bleu on training set */
+  ValType Evaluate(const AvgWeightVector& wv);
+
+};
+
+
+/** Gets hope-fear from nbest lists */
+class NbestHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+  NbestHopeFearDecoder(const std::vector<std::string>& featureFiles,
+                         const std::vector<std::string>&  scoreFiles,
+                         bool streaming,
+                         bool  no_shuffle,
+                         bool safe_hope
+                         );
+
+  virtual void reset();
+  virtual void next();
+  virtual bool finished();
+
+  virtual void HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              );
+
+  virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+  boost::scoped_ptr<HypPackEnumerator> train_;
+  bool safe_hope_;
+
+};
+
+
+
+/** Gets hope-fear from hypergraphs */
+class HypergraphHopeFearDecoder : public virtual HopeFearDecoder {
+public:
+  HypergraphHopeFearDecoder(
+                            const std::string& hypergraphDir,
+                            const std::vector<std::string>& referenceFiles,
+                            size_t num_dense,
+                            bool streaming,
+                            bool no_shuffle,
+                            bool safe_hope,
+                            size_t hg_pruning,
+                            const MiraWeightVector& wv
+                            );
+
+  virtual void reset();
+  virtual void next();
+  virtual bool finished();
+
+  virtual void HopeFear(
+              const std::vector<ValType>& backgroundBleu,
+              const MiraWeightVector& wv,
+              HopeFearData* hopeFear
+              );
+
+  virtual void MaxModel(const AvgWeightVector& wv, std::vector<ValType>* stats);
+
+private:
+  size_t num_dense_;
+  //maps sentence Id to graph ptr
+  typedef std::map<size_t, boost::shared_ptr<Graph> > GraphColl;
+  GraphColl graphs_;
+  GraphColl::const_iterator graphIter_;
+  ReferenceSet references_;
+  Vocab vocab_;
+};
+
+};
+
+#endif
+
diff --git a/mert/Hypergraph.cpp b/mert/Hypergraph.cpp
new file mode 100644
index 0000000000..d80a5c12cc
--- /dev/null
+++ b/mert/Hypergraph.cpp
@@ -0,0 +1,286 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#include <iostream>
+#include <set>
+
+#include <boost/lexical_cast.hpp>
+
+#include "util/double-conversion/double-conversion.h"
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "Hypergraph.h"
+
+using namespace std;
+static const string kBOS = "<s>";
+static const string kEOS = "</s>";
+
+namespace MosesTuning {
+
+StringPiece NextLine(util::FilePiece& from) {
+  StringPiece line;
+  while ((line = from.ReadLine()).starts_with("#"));
+  return line;
+}
+
+Vocab::Vocab() :  eos_( FindOrAdd(kEOS)), bos_(FindOrAdd(kBOS)){
+}
+
+const Vocab::Entry &Vocab::FindOrAdd(const StringPiece &str) {
+#if BOOST_VERSION >= 104200
+  Map::const_iterator i= map_.find(str, Hash(), Equals());
+#else
+  std::string copied_str(str.data(), str.size());
+  Map::const_iterator i = map_.find(copied_str.c_str());
+#endif
+  if (i != map_.end()) return *i;
+  char *copied = static_cast<char*>(piece_backing_.Allocate(str.size() + 1));
+  memcpy(copied, str.data(), str.size());
+  copied[str.size()] = 0;
+  return *map_.insert(Entry(copied, map_.size())).first;
+}
+
+double_conversion::StringToDoubleConverter converter(double_conversion::StringToDoubleConverter::NO_FLAGS, NAN, NAN, "inf", "nan");
+
+
+/**
+ * Reads an incoming edge. Returns edge and source words covered.
+**/
+static pair<Edge*,size_t> ReadEdge(util::FilePiece &from, Graph &graph) {
+  Edge* edge = graph.NewEdge();
+  StringPiece line = NextLine(from);
+  util::TokenIter<util::MultiCharacter> pipes(line, util::MultiCharacter(" ||| "));
+  //Target
+  for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+    StringPiece got = *i;
+    if ('[' == *got.data() && ']' == got.data()[got.size() - 1]) {
+      // non-terminal
+      char *end_ptr;
+      unsigned long int child = std::strtoul(got.data() + 1, &end_ptr, 10);
+      UTIL_THROW_IF(end_ptr != got.data() + got.size() - 1, HypergraphException, "Bad non-terminal" << got);
+      UTIL_THROW_IF(child >= graph.VertexSize(), HypergraphException, "Reference to vertex " << child << " but we only have " << graph.VertexSize() << " vertices.  Is the file in bottom-up format?");
+      edge->AddWord(NULL);
+      edge->AddChild(child);
+    } else {
+      const Vocab::Entry &found = graph.MutableVocab().FindOrAdd(got);
+      edge->AddWord(&found);
+    }
+  }
+ 
+  //Features
+  ++pipes;
+  for (util::TokenIter<util::SingleCharacter, true> i(*pipes, util::SingleCharacter(' ')); i; ++i) {
+    StringPiece fv = *i;
+    if (!fv.size()) break;
+    size_t equals = fv.find_last_of("=");
+    UTIL_THROW_IF(equals == fv.npos, HypergraphException, "Failed to parse feature '" << fv << "'");
+    StringPiece name = fv.substr(0,equals);
+    StringPiece value = fv.substr(equals+1);
+    int processed;
+    float score = converter.StringToFloat(value.data(), value.length(), &processed);
+    UTIL_THROW_IF(isnan(score), HypergraphException, "Failed to parse weight '" << value << "'");
+    edge->AddFeature(name,score);
+  }
+  //Covered words
+  ++pipes;
+  size_t sourceCovered = boost::lexical_cast<size_t>(*pipes);
+  return pair<Edge*,size_t>(edge,sourceCovered); 
+}
+
+void Graph::Prune(Graph* pNewGraph, const SparseVector& weights, size_t minEdgeCount) const {
+
+  Graph& newGraph = *pNewGraph;
+  //TODO: Optimise case where no pruning required
+
+  //For debug
+  
+  /*
+  map<const Edge*, string> edgeIds;
+  for (size_t i = 0; i < edges_.Size(); ++i) {
+    stringstream str;
+    for (size_t j = 0; j < edges_[i].Words().size(); ++j) {
+      if (edges_[i].Words()[j]) str << edges_[i].Words()[j]->first;
+    }
+    edgeIds[&(edges_[i])] = str.str();
+  }*/
+
+  //end For debug
+
+  map<const Edge*, FeatureStatsType> edgeBackwardScores;
+  map<const Edge*, size_t> edgeHeads;
+  vector<FeatureStatsType> vertexBackwardScores(vertices_.Size(), kMinScore);
+  vector<vector<const Edge*> > outgoing(vertices_.Size());
+
+  //Compute backward scores
+  for (size_t vi = 0; vi < vertices_.Size(); ++vi) {
+    //cerr << "Vertex " << vi << endl;
+    const Vertex& vertex = vertices_[vi];
+    const vector<const Edge*>& incoming = vertex.GetIncoming();
+    if (!incoming.size()) {
+      vertexBackwardScores[vi] = 0;
+    } else {
+      for (size_t ei = 0; ei < incoming.size(); ++ei) {
+        //cerr << "Edge " << edgeIds[incoming[ei]] << endl;
+        edgeHeads[incoming[ei]]= vi;
+        FeatureStatsType incomingScore = incoming[ei]->GetScore(weights);
+        for (size_t i = 0; i < incoming[ei]->Children().size(); ++i) {
+       //   cerr << "\tChild " << incoming[ei]->Children()[i] << endl;
+          size_t childId = incoming[ei]->Children()[i];
+          UTIL_THROW_IF(vertexBackwardScores[childId] == kMinScore,
+            HypergraphException, "Graph was not topologically sorted. curr=" << vi << " prev=" << childId);
+          outgoing[childId].push_back(incoming[ei]);
+          incomingScore += vertexBackwardScores[childId];
+        }
+        edgeBackwardScores[incoming[ei]]= incomingScore;
+     //   cerr << "Backward score: " << incomingScore << endl;
+        if (incomingScore > vertexBackwardScores[vi]) vertexBackwardScores[vi] = incomingScore;
+      }
+    }
+  }
+
+  //Compute forward scores
+  vector<FeatureStatsType> vertexForwardScores(vertices_.Size(), kMinScore);
+  map<const Edge*, FeatureStatsType> edgeForwardScores;
+  for (size_t i = 1; i <= vertices_.Size(); ++i) {
+    size_t vi = vertices_.Size() - i;
+    //cerr << "Vertex " << vi << endl;
+    if (!outgoing[vi].size()) {
+      vertexForwardScores[vi] = 0;
+    } else {
+      for (size_t ei = 0; ei < outgoing[vi].size(); ++ei) {
+        //cerr << "Edge " << edgeIds[outgoing[vi][ei]] << endl;
+        FeatureStatsType outgoingScore = 0; 
+        //sum scores of siblings
+        for (size_t i = 0; i < outgoing[vi][ei]->Children().size(); ++i) {
+          size_t siblingId = outgoing[vi][ei]->Children()[i];
+          if (siblingId != vi) {
+         //   cerr << "\tSibling " << siblingId << endl;
+            outgoingScore += vertexBackwardScores[siblingId];
+          }
+        }
+        //add score of head
+        outgoingScore += vertexForwardScores[edgeHeads[outgoing[vi][ei]]];
+        //cerr << "Forward score " << outgoingScore << endl;
+        edgeForwardScores[outgoing[vi][ei]] = outgoingScore;
+        outgoingScore += outgoing[vi][ei]->GetScore(weights);
+        if (outgoingScore > vertexForwardScores[vi]) vertexForwardScores[vi] = outgoingScore;
+      }
+    }
+  }
+
+
+
+  multimap<FeatureStatsType, const Edge*> edgeScores;
+  for (size_t i = 0; i < edges_.Size(); ++i) {
+    const Edge* edge = &(edges_[i]);
+    if (edgeForwardScores.find(edge) == edgeForwardScores.end()) {
+      //This edge has no children, so didn't get a forward score. Its forward score
+      //is that of its head
+      edgeForwardScores[edge] = vertexForwardScores[edgeHeads[edge]];
+    }
+    FeatureStatsType score = edgeForwardScores[edge] + edgeBackwardScores[edge];
+    edgeScores.insert(pair<FeatureStatsType, const Edge*>(score,edge));
+  //  cerr << edgeIds[edge] << " " << score << endl;
+  }
+
+
+  
+  multimap<FeatureStatsType, const Edge*>::const_reverse_iterator ei = edgeScores.rbegin();
+  size_t edgeCount = 1;
+  while(edgeCount < minEdgeCount && ei != edgeScores.rend()) {
+    ++ei;
+    ++edgeCount;
+  }
+  multimap<FeatureStatsType, const Edge*>::const_iterator lowest = edgeScores.begin();
+  if (ei != edgeScores.rend())  lowest = edgeScores.lower_bound(ei->first);
+
+  //cerr << "Retained edges" << endl;
+  set<size_t> retainedVertices;
+  set<const Edge*> retainedEdges;
+  for (; lowest != edgeScores.end(); ++lowest) {
+    //cerr << lowest->first << " " << edgeIds[lowest->second] << endl;
+    retainedEdges.insert(lowest->second);
+    retainedVertices.insert(edgeHeads[lowest->second]);
+    for (size_t i = 0; i < lowest->second->Children().size(); ++i) {
+      retainedVertices.insert(lowest->second->Children()[i]);
+    }
+  }
+  newGraph.SetCounts(retainedVertices.size(), retainedEdges.size());
+
+  //cerr << "Retained vertices" << endl;
+  map<size_t,size_t> oldIdToNew;
+  size_t vi = 0;
+  for (set<size_t>::const_iterator i = retainedVertices.begin(); i != retainedVertices.end(); ++i, ++vi) {
+    //cerr << *i << endl;
+    oldIdToNew[*i] = vi;
+    Vertex* vertex = newGraph.NewVertex();
+    vertex->SetSourceCovered(vertices_[*i].SourceCovered()); 
+  }
+
+  for (set<const Edge*>::const_iterator i = retainedEdges.begin(); i != retainedEdges.end(); ++i) {
+    Edge* newEdge = newGraph.NewEdge();
+    const Edge* oldEdge = *i;
+    for (size_t j = 0; j < oldEdge->Words().size(); ++j) {
+      newEdge->AddWord(oldEdge->Words()[j]);
+    }
+    for (size_t j = 0; j < oldEdge->Children().size(); ++j) {
+      newEdge->AddChild(oldIdToNew[oldEdge->Children()[j]]);
+    }
+    newEdge->SetFeatures(oldEdge->Features());
+    Vertex& newHead = newGraph.vertices_[oldIdToNew[edgeHeads[oldEdge]]];
+    newHead.AddEdge(newEdge);
+  }
+
+  
+
+
+}
+
+/**
+  * Read from "Kenneth's hypergraph" aka cdec target_graph format (with comments)
+**/
+void ReadGraph(util::FilePiece &from, Graph &graph) {
+
+  //First line should contain field names
+  StringPiece line = from.ReadLine();
+  UTIL_THROW_IF(line.compare("# target ||| features ||| source-covered") != 0, HypergraphException, "Incorrect format spec on first line: '" << line << "'");
+  line = NextLine(from);
+  
+  //Then expect numbers of vertices
+  util::TokenIter<util::SingleCharacter, false> i(line, util::SingleCharacter(' '));
+  unsigned long int vertices = boost::lexical_cast<unsigned long int>(*i);
+  ++i;
+  unsigned long int edges = boost::lexical_cast<unsigned long int>(*i);
+  graph.SetCounts(vertices, edges);
+  //cerr << "vertices: " << vertices << "; edges: " << edges << endl;
+  for (size_t i = 0; i < vertices; ++i) {
+    line = NextLine(from);
+    unsigned long int edge_count = boost::lexical_cast<unsigned long int>(line);
+    Vertex* vertex = graph.NewVertex();
+    for (unsigned long int e = 0; e < edge_count; ++e) {
+      pair<Edge*,size_t> edge = ReadEdge(from, graph);
+      vertex->AddEdge(edge.first);
+      //Note: the file format attaches this to the edge, but it's really a property 
+      //of the vertex.
+      if (!e) {vertex->SetSourceCovered(edge.second);}
+    }
+  }
+}
+
+};
diff --git a/mert/Hypergraph.h b/mert/Hypergraph.h
new file mode 100644
index 0000000000..b6ee6c3f87
--- /dev/null
+++ b/mert/Hypergraph.h
@@ -0,0 +1,251 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2014- University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef MERT_HYPERGRAPH_H
+#define MERT_HYPERGRAPH_H
+
+#include <string>
+
+#include <boost/noncopyable.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/functional/hash/hash.hpp>
+#include <boost/unordered_map.hpp>
+
+
+#include "util/exception.hh"
+#include "util/file_piece.hh"
+#include "util/murmur_hash.hh"
+#include "util/pool.hh"
+#include "util/string_piece.hh"
+
+#include "FeatureStats.h"
+
+namespace MosesTuning {
+
+typedef unsigned int WordIndex;
+const WordIndex kMaxWordIndex = UINT_MAX;
+const FeatureStatsType kMinScore = -std::numeric_limits<FeatureStatsType>::max();
+
+template <class T> class FixedAllocator : boost::noncopyable {
+  public:
+    FixedAllocator() : current_(NULL), end_(NULL) {}
+
+    void Init(std::size_t count) {
+      assert(!current_);
+      array_.reset(new T[count]);
+      current_ = array_.get();
+      end_ = current_ + count;
+    }
+
+    T &operator[](std::size_t idx) {
+      return array_.get()[idx];
+    }
+    const T &operator[](std::size_t idx) const {
+      return array_.get()[idx];
+    }
+
+    T *New() {
+      T *ret = current_++;
+      UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end");
+      return ret;
+    }
+
+    std::size_t Capacity() const {
+      return end_ - array_.get();
+    }
+
+    std::size_t Size() const {
+      return current_ - array_.get();
+    }
+
+  private:
+    boost::scoped_array<T> array_;
+    T *current_, *end_;
+};
+
+
+class Vocab {
+  public:
+    Vocab();
+
+    typedef std::pair<const char *const, WordIndex> Entry;
+
+    const Entry &FindOrAdd(const StringPiece &str);
+
+    const Entry& Bos() const {return bos_;}
+
+    const Entry& Eos() const {return eos_;}
+
+  private:
+    util::Pool piece_backing_;
+
+    struct Hash : public std::unary_function<const char *, std::size_t> {
+      std::size_t operator()(StringPiece str) const {
+        return util::MurmurHashNative(str.data(), str.size());
+      }
+    };
+
+    struct Equals : public std::binary_function<const char *, const char *, bool> {
+      bool operator()(StringPiece first, StringPiece second) const {
+        return first == second;
+      }
+    };
+
+    typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map;
+    Map map_;
+    Entry eos_;
+    Entry bos_;
+
+};
+
+typedef std::vector<const Vocab::Entry*> WordVec;
+
+class Vertex;
+
+//Use shared pointer to save copying when we prune
+typedef boost::shared_ptr<SparseVector> FeaturePtr;
+
+/**
+ * An edge has 1 head vertex, 0..n child (tail) vertices, a list of words and a feature vector.
+**/
+class Edge {
+  public:
+    Edge() {features_.reset(new SparseVector());}
+
+    void AddWord(const Vocab::Entry *word) {
+      words_.push_back(word);
+    }
+
+    void AddChild(size_t child) {
+      children_.push_back(child);
+    }
+
+    void AddFeature(const StringPiece& name, FeatureStatsType value) {
+      //TODO StringPiece interface
+      features_->set(name.as_string(),value);
+    }
+
+
+    const WordVec &Words() const {
+      return words_;
+    }
+    
+    const FeaturePtr& Features() const {
+      return features_;
+    }
+
+    void SetFeatures(const FeaturePtr& features) {
+      features_ = features;
+    }
+
+    const std::vector<size_t>& Children() const {
+      return children_;
+    }
+
+    FeatureStatsType GetScore(const SparseVector& weights) const {
+      return inner_product(*(features_.get()), weights);
+    }
+
+  private:
+    // NULL for non-terminals.  
+    std::vector<const Vocab::Entry*> words_;
+    std::vector<size_t> children_;
+    boost::shared_ptr<SparseVector> features_;
+};
+
+/*
+ * A vertex has 0..n incoming edges
+ **/
+class Vertex {
+  public:
+    Vertex() : sourceCovered_(0) {}
+
+    void AddEdge(const Edge* edge) {incoming_.push_back(edge);}
+
+    void SetSourceCovered(size_t sourceCovered) {sourceCovered_ = sourceCovered;}
+
+    const std::vector<const Edge*>& GetIncoming() const {return incoming_;}
+
+    size_t SourceCovered() const {return sourceCovered_;}
+
+  private:
+    std::vector<const Edge*> incoming_;
+    size_t sourceCovered_;
+};
+
+
+class Graph : boost::noncopyable {
+  public:
+    Graph(Vocab& vocab) : vocab_(vocab) {}
+
+    void SetCounts(std::size_t vertices, std::size_t edges) {
+      vertices_.Init(vertices);
+      edges_.Init(edges);
+    }
+
+    Vocab &MutableVocab() { return vocab_; }
+
+    Edge *NewEdge() {      
+      return edges_.New();
+    }
+
+    Vertex *NewVertex() {
+      return vertices_.New();
+    }
+
+    const Vertex &GetVertex(std::size_t index) const {
+      return vertices_[index];
+    }
+
+    Edge &GetEdge(std::size_t index) {
+      return edges_[index];
+    }
+
+    /* Created a pruned copy of this graph with minEdgeCount edges. Uses
+    the scores in the max-product semiring to rank edges, as suggested by
+    Colin Cherry */
+    void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const;
+
+    std::size_t VertexSize() const { return vertices_.Size(); }
+    std::size_t EdgeSize() const { return edges_.Size(); }
+
+    bool IsBoundary(const Vocab::Entry* word) const {
+      return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second;
+    }
+
+  private:
+    FixedAllocator<Edge> edges_;    
+    FixedAllocator<Vertex> vertices_;
+    Vocab& vocab_;
+};
+
+class HypergraphException : public util::Exception {
+  public:
+    HypergraphException() {}
+    ~HypergraphException() throw() {}
+};
+
+
+void ReadGraph(util::FilePiece &from, Graph &graph);
+
+
+};
+
+#endif
diff --git a/mert/HypergraphTest.cpp b/mert/HypergraphTest.cpp
new file mode 100644
index 0000000000..345a445f05
--- /dev/null
+++ b/mert/HypergraphTest.cpp
@@ -0,0 +1,151 @@
+#include <iostream>
+
+#define BOOST_TEST_MODULE MertForestRescore
+#include <boost/test/unit_test.hpp>
+
+#include "Hypergraph.h"
+
+using namespace std;
+using namespace MosesTuning;
+
+BOOST_AUTO_TEST_CASE(prune) 
+{
+  Vocab vocab;
+  WordVec words;
+  string wordStrings[] =
+    {"<s>", "</s>", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"};
+  for (size_t i = 0; i < 13; ++i) {
+    words.push_back(&(vocab.FindOrAdd((wordStrings[i]))));
+  }
+
+  const string f1 = "foo";
+  const string f2 = "bar";
+  Graph graph(vocab);
+  graph.SetCounts(5,8);
+
+  Edge* e0 = graph.NewEdge();
+  e0->AddWord(words[0]);
+
+  Vertex* v0 = graph.NewVertex();
+  v0->AddEdge(e0);
+
+  Edge* e1 = graph.NewEdge();
+  e1->AddWord(NULL);
+  e1->AddChild(0);
+  e1->AddWord(words[2]);
+  e1->AddWord(words[3]);
+  e1->AddFeature(f1,1);
+  e1->AddFeature(f2,1);
+  Edge* e5 = graph.NewEdge();
+  e5->AddWord(NULL);
+  e5->AddChild(0);
+  e5->AddWord(words[9]);
+  e5->AddWord(words[10]);
+  e5->AddFeature(f1,2);
+  e5->AddFeature(f2,-2);
+
+  Vertex* v1 = graph.NewVertex();
+  v1->AddEdge(e1);
+  v1->AddEdge(e5);
+  v1->SetSourceCovered(1);
+
+  Edge* e2 = graph.NewEdge();
+  e2->AddWord(NULL);
+  e2->AddChild(1);
+  e2->AddWord(words[4]);
+  e2->AddWord(words[5]);
+  e2->AddFeature(f2,3);
+
+  Vertex* v2 = graph.NewVertex();
+  v2->AddEdge(e2);
+  v2->SetSourceCovered(3);
+
+  Edge* e3 = graph.NewEdge();
+  e3->AddWord(NULL);
+  e3->AddChild(2);
+  e3->AddWord(words[6]);
+  e3->AddWord(words[7]);
+  e3->AddWord(words[8]);
+  e3->AddFeature(f1,1);
+  Edge* e6 = graph.NewEdge();
+  e6->AddWord(NULL);
+  e6->AddChild(2);
+  e6->AddWord(words[9]);
+  e6->AddWord(words[12]);
+  e6->AddFeature(f2,1);
+  Edge* e7 = graph.NewEdge();
+  e7->AddWord(NULL);
+  e7->AddChild(1);
+  e7->AddWord(words[11]);
+  e7->AddWord(words[12]);
+  e7->AddFeature(f1,2);
+  e7->AddFeature(f2,3);
+
+  Vertex* v3 = graph.NewVertex();
+  v3->AddEdge(e3);
+  v3->AddEdge(e6);
+  v3->AddEdge(e7);
+  v3->SetSourceCovered(5);
+
+  Edge* e4 = graph.NewEdge();
+  e4->AddWord(NULL);
+  e4->AddChild(3);
+  e4->AddWord(words[1]);
+
+  Vertex* v4 = graph.NewVertex();
+  v4->AddEdge(e4);
+  v4->SetSourceCovered(6);
+
+  SparseVector weights;
+  weights.set(f1,2);
+  weights.set(f2,1);
+
+  Graph pruned(vocab);
+  graph.Prune(&pruned, weights, 5);
+
+  BOOST_CHECK_EQUAL(5, pruned.EdgeSize());
+  BOOST_CHECK_EQUAL(4, pruned.VertexSize());
+  
+  //edges retained should be best path (<s> ab jk </s>) and hi
+  BOOST_CHECK_EQUAL(1, pruned.GetVertex(0).GetIncoming().size());
+  BOOST_CHECK_EQUAL(2, pruned.GetVertex(1).GetIncoming().size());
+  BOOST_CHECK_EQUAL(1, pruned.GetVertex(2).GetIncoming().size());
+  BOOST_CHECK_EQUAL(1, pruned.GetVertex(3).GetIncoming().size());
+
+  const Edge* edge;
+
+  edge =  pruned.GetVertex(0).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(1, edge->Words().size()); 
+  BOOST_CHECK_EQUAL(words[0], edge->Words()[0]); 
+
+  edge =  pruned.GetVertex(1).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(3, edge->Words().size()); 
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]); 
+  BOOST_CHECK_EQUAL(words[2]->first, edge->Words()[1]->first); 
+  BOOST_CHECK_EQUAL(words[3]->first, edge->Words()[2]->first); 
+
+  edge =  pruned.GetVertex(1).GetIncoming()[1];
+  BOOST_CHECK_EQUAL(3, edge->Words().size());
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+  BOOST_CHECK_EQUAL(words[9]->first, edge->Words()[1]->first); 
+  BOOST_CHECK_EQUAL(words[10]->first, edge->Words()[2]->first);
+
+  edge =  pruned.GetVertex(2).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(3, edge->Words().size());
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+  BOOST_CHECK_EQUAL(words[11]->first, edge->Words()[1]->first); 
+  BOOST_CHECK_EQUAL(words[12]->first, edge->Words()[2]->first);
+
+  edge =  pruned.GetVertex(3).GetIncoming()[0];
+  BOOST_CHECK_EQUAL(2, edge->Words().size());
+  BOOST_CHECK_EQUAL((Vocab::Entry*)NULL, edge->Words()[0]);
+  BOOST_CHECK_EQUAL(words[1]->first, edge->Words()[1]->first); 
+
+
+
+  
+
+//  BOOST_CHECK_EQUAL(words[0], pruned.GetVertex(0).GetIncoming()[0].Words()[0]);   
+  
+
+}
diff --git a/mert/Jamfile b/mert/Jamfile
index 34c640b06f..d848c258fa 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -15,6 +15,9 @@ FeatureStats.cpp
 FeatureArray.cpp
 FeatureData.cpp
 FeatureDataIterator.cpp
+ForestRescore.cpp
+HopeFearDecoder.cpp
+Hypergraph.cpp
 MiraFeatureVector.cpp
 MiraWeightVector.cpp
 HypPackEnumerator.cpp
@@ -62,13 +65,15 @@ exe sentence-bleu : sentence-bleu.cpp mert_lib ;
 
 exe pro : pro.cpp mert_lib ..//boost_program_options ;
 
-exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ;
+exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
 
 alias programs : mert extractor evaluator pro kbmira sentence-bleu ;
 
 unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test forest_rescore_test : ForestRescoreTest.cpp mert_lib ..//boost_unit_test_framework ;
+unit-test hypergraph_test : HypergraphTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test optimizer_factory_test : OptimizerFactoryTest.cpp mert_lib ..//boost_unit_test_framework ;
 unit-test point_test : PointTest.cpp mert_lib ..//boost_unit_test_framework ;
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index dea9b9b838..347ad488e0 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -9,18 +9,17 @@ namespace MosesTuning
 {
 
 
-MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
-  : m_dense(vec.dense)
-{
-  vector<size_t> sparseFeats = vec.sparse.feats();
+void MiraFeatureVector::InitSparse(const SparseVector& sparse, size_t ignoreLimit) {
+  vector<size_t> sparseFeats = sparse.feats();
   bool bFirst = true;
   size_t lastFeat = 0;
   m_sparseFeats.reserve(sparseFeats.size());
   m_sparseVals.reserve(sparseFeats.size());
   for(size_t i=0; i<sparseFeats.size(); i++) {
+    if (sparseFeats[i] < ignoreLimit) continue;
     size_t feat = m_dense.size() + sparseFeats[i];
     m_sparseFeats.push_back(feat);
-    m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
+    m_sparseVals.push_back(sparse.get(sparseFeats[i]));
 
     // Check ordered property
     if(bFirst) {
@@ -35,6 +34,21 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
   }
 }
 
+MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
+  : m_dense(vec.dense)
+{
+  InitSparse(vec.sparse);
+}
+
+MiraFeatureVector::MiraFeatureVector(const SparseVector& sparse, size_t num_dense) {
+  m_dense.resize(num_dense);
+  //Assume that features with id [0,num_dense) are the dense features
+  for (size_t id = 0; id < num_dense; ++id) {
+    m_dense[id] = sparse.get(id);
+  }
+  InitSparse(sparse,num_dense);
+}
+
 MiraFeatureVector::MiraFeatureVector(const MiraFeatureVector& other)
   : m_dense(other.m_dense),
     m_sparseFeats(other.m_sparseFeats),
@@ -148,6 +162,22 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
   return MiraFeatureVector(dense,sparseFeats,sparseVals);
 }
 
+bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b) {
+  ValType eps = 1e-8;
+  //dense features
+  if (a.m_dense.size() != b.m_dense.size()) return false;
+  for (size_t i = 0; i < a.m_dense.size(); ++i) {
+    if (fabs(a.m_dense[i]-b.m_dense[i]) < eps) return false;
+  }
+  if (a.m_sparseFeats.size() != b.m_sparseFeats.size()) return false;
+  for (size_t i = 0; i < a.m_sparseFeats.size(); ++i) {
+    if (a.m_sparseFeats[i] != b.m_sparseFeats[i]) return false;
+    if (fabs(a.m_sparseVals[i] != b.m_sparseVals[i])) return false;
+  }
+  return true;
+
+}
+
 ostream& operator<<(ostream& o, const MiraFeatureVector& e)
 {
   for(size_t i=0; i<e.size(); i++) {
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index cb2b1c87d8..48aa496b50 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -26,7 +26,10 @@ typedef FeatureStatsType ValType;
 class MiraFeatureVector
 {
 public:
+  MiraFeatureVector() {}
   MiraFeatureVector(const FeatureDataItem& vec);
+  //Assumes that features in sparse with id < num_dense are dense features
+  MiraFeatureVector(const SparseVector& sparse, size_t num_dense);
   MiraFeatureVector(const MiraFeatureVector& other);
   MiraFeatureVector(const std::vector<ValType>& dense,
                     const std::vector<std::size_t>& sparseFeats,
@@ -42,7 +45,12 @@ class MiraFeatureVector
 
   friend std::ostream& operator<<(std::ostream& o, const MiraFeatureVector& e);
 
+  friend bool operator==(const MiraFeatureVector& a,const MiraFeatureVector& b);
+
 private:
+  //Ignore any sparse features with id < ignoreLimit
+  void InitSparse(const SparseVector& sparse, size_t ignoreLimit = 0);
+
   std::vector<ValType> m_dense;
   std::vector<std::size_t> m_sparseFeats;
   std::vector<ValType> m_sparseVals;
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index e23804cbf0..3b7b1780c3 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -93,6 +93,14 @@ void MiraWeightVector::update(size_t index, ValType delta)
   m_lastUpdated[index] = m_numUpdates;
 }
 
+void MiraWeightVector::ToSparse(SparseVector* sparse) const {
+  for (size_t i = 0; i < m_weights.size(); ++i) {
+    if(abs(m_weights[i])>1e-8) {
+      sparse->set(i,m_weights[i]);
+    }
+  }
+}
+
 /**
  * Make sure everyone's total is up-to-date
  */
@@ -163,6 +171,15 @@ size_t AvgWeightVector::size() const
   return m_wv.m_weights.size();
 }
 
+void AvgWeightVector::ToSparse(SparseVector* sparse) const {
+  for (size_t i = 0; i < size(); ++i) {
+    ValType w = weight(i);
+    if(abs(w)>1e-8) {
+      sparse->set(i,w);
+    }
+  }
+}
+
 // --Emacs trickery--
 // Local Variables:
 // mode:c++
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index eb27e8a6de..bbc28704ba 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -63,6 +63,11 @@ class MiraWeightVector
    */
   AvgWeightVector avg();
 
+  /**
+    * Convert to sparse vector, interpreting all features as sparse.
+   **/
+  void ToSparse(SparseVector* sparse) const;
+
   friend class AvgWeightVector;
 
   friend std::ostream& operator<<(std::ostream& o, const MiraWeightVector& e);
@@ -99,12 +104,12 @@ class AvgWeightVector
   ValType score(const MiraFeatureVector& fv) const;
   ValType weight(std::size_t index) const;
   std::size_t size() const;
+  void ToSparse(SparseVector* sparse) const;
 private:
   const MiraWeightVector& m_wv;
 };
 
 
-#endif // MERT_WEIGHT_VECTOR_H
 
 // --Emacs trickery--
 // Local Variables:
@@ -113,3 +118,4 @@ class AvgWeightVector
 // End:
 
 }
+#endif // MERT_WEIGHT_VECTOR_H
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index a2665ac131..da552fa364 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -39,8 +39,10 @@ de recherches du Canada
 #include <boost/program_options.hpp>
 #include <boost/scoped_ptr.hpp>
 
+#include "util/exception.hh"
+
 #include "BleuScorer.h"
-#include "HypPackEnumerator.h"
+#include "HopeFearDecoder.h"
 #include "MiraFeatureVector.h"
 #include "MiraWeightVector.h"
 
@@ -49,38 +51,16 @@ using namespace MosesTuning;
 
 namespace po = boost::program_options;
 
-ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv)
-{
-  vector<ValType> stats(kBleuNgramOrder*2+1,0);
-  for(train->reset(); !train->finished(); train->next()) {
-    // Find max model
-    size_t max_index=0;
-    ValType max_score=0;
-    for(size_t i=0; i<train->cur_size(); i++) {
-      MiraFeatureVector vec(train->featuresAt(i));
-      ValType score = wv.score(vec);
-      if(i==0 || score > max_score) {
-        max_index = i;
-        max_score = score;
-      }
-    }
-    // Update stats
-    const vector<float>& sent = train->scoresAt(max_index);
-    for(size_t i=0; i<sent.size(); i++) {
-      stats[i]+=sent[i];
-    }
-  }
-  return unsmoothedBleu(stats);
-}
-
 int main(int argc, char** argv)
 {
-  const ValType BLEU_RATIO = 5;
   bool help;
   string denseInitFile;
   string sparseInitFile;
+  string type = "nbest";
   vector<string> scoreFiles;
   vector<string> featureFiles;
+  vector<string> referenceFiles; //for hg mira
+  string hgDir;
   int seed;
   string outputFile;
   float c = 0.01;      // Step-size cap C
@@ -91,25 +71,30 @@ int main(int argc, char** argv)
   bool model_bg = false; // Use model for background corpus
   bool verbose = false; // Verbose updates
   bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
+  size_t hgPruning = 50; //prune hypergraphs to have this many edges per reference word 
 
   // Command-line processing follows pro.cpp
   po::options_description desc("Allowed options");
   desc.add_options()
   ("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
+  ("type,t", po::value<string>(&type), "Either nbest or hypergraph")
   ("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
   ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
+  ("hgdir,H", po::value<string> (&hgDir), "Directory containing hypergraphs")
+  ("reference,R", po::value<vector<string> > (&referenceFiles), "Reference files, only required for hypergraph mira")
   ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
   ("output-file,o", po::value<string>(&outputFile), "Output file")
   ("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
   ("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
   ("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
-  ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features")
+  ("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features. This should have 'name= value' on each line, or (legacy) should be the Moses mert 'init.opt' format.")
   ("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
   ("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
   ("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
   ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background")
   ("verbose", po::value(&verbose)->zero_tokens()->default_value(false), "Verbose updates")
   ("safe-hope", po::value(&safe_hope)->zero_tokens()->default_value(false), "Mode score's influence on hope decoding is limited")
+  ("hg-prune", po::value<size_t>(&hgPruning), "Prune hypergraphs to have this many edges per reference word")
   ;
 
   po::options_description cmdline_options;
@@ -145,12 +130,56 @@ int main(int argc, char** argv)
       cerr << "could not open dense initfile: " << denseInitFile << endl;
       exit(3);
     }
+    if (verbose) cerr << "Reading dense features:" << endl;
     parameter_t val;
     getline(opt,buffer);
-    istringstream strstrm(buffer);
-    while(strstrm >> val) {
-      initParams.push_back(val);
+    if (buffer.find_first_of("=") == buffer.npos) {
+      UTIL_THROW_IF(type == "hypergraph", util::Exception, "For hypergraph version, require dense features in 'name= value' format");
+      cerr << "WARN: dense features in deprecated Moses mert format. Prefer 'name= value' format." << endl;
+      istringstream strstrm(buffer);
+      while(strstrm >> val) {
+        initParams.push_back(val);
+        if(verbose) cerr << val << endl;
+      }
+    } else {
+      vector<string> names;
+      string last_name = "";
+      size_t feature_ctr = 0;
+      do {
+        size_t equals = buffer.find_last_of("=");
+        UTIL_THROW_IF(equals == buffer.npos, util::Exception, "Incorrect format in dense feature file: '"
+          << buffer << "'");
+        string name = buffer.substr(0,equals);
+        names.push_back(name);
+        initParams.push_back(boost::lexical_cast<ValType>(buffer.substr(equals+2)));
+
+        //Names for features with several values need to have their id added
+        if (name != last_name) feature_ctr = 0;
+        last_name = name;
+        if (feature_ctr) {
+          stringstream namestr;
+          namestr << names.back() << feature_ctr;
+          names[names.size()-1] = namestr.str();
+          if (feature_ctr == 1) {
+            stringstream namestr;
+            namestr << names[names.size()-2] << (feature_ctr-1);
+            names[names.size()-2] = namestr.str();
+          }
+        }
+        ++feature_ctr;
+
+      } while(getline(opt,buffer));
+
+
+      //Make sure that SparseVector encodes dense feature names as 0..n-1.
+      for (size_t i = 0; i < names.size(); ++i) {
+        size_t id = SparseVector::encode(names[i]);
+        assert(id == i);     
+        if (verbose) cerr << names[i] << " " << initParams[i] << endl;
+      }
+
     }
+
     opt.close();
   }
   size_t initDenseSize = initParams.size();
@@ -188,82 +217,45 @@ int main(int argc, char** argv)
   }
   bg.push_back(kBleuNgramOrder);
 
+  boost::scoped_ptr<HopeFearDecoder> decoder;
+  if (type == "nbest") {
+    decoder.reset(new NbestHopeFearDecoder(featureFiles, scoreFiles, streaming, no_shuffle, safe_hope));
+  } else if (type == "hypergraph") {
+    decoder.reset(new HypergraphHopeFearDecoder(hgDir, referenceFiles, initDenseSize, streaming, no_shuffle, safe_hope, hgPruning, wv));
+  } else {
+    UTIL_THROW(util::Exception, "Unknown batch mira type: '" << type << "'");
+  }
+
   // Training loop
-  boost::scoped_ptr<HypPackEnumerator> train;
-  if(streaming)
-    train.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
-  else
-    train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
-  cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
+  cerr << "Initial BLEU = " << decoder->Evaluate(wv.avg()) << endl;
   ValType bestBleu = 0;
   for(int j=0; j<n_iters; j++) {
     // MIRA train for one epoch
-    int iNumHyps = 0;
     int iNumExamples = 0;
     int iNumUpdates = 0;
     ValType totalLoss = 0.0;
-    for(train->reset(); !train->finished(); train->next()) {
-      // Hope / fear decode
-      ValType hope_scale = 1.0;
-      size_t hope_index=0, fear_index=0, model_index=0;
-      ValType hope_score=0, fear_score=0, model_score=0;
-      int iNumHypsBackup = iNumHyps;
-      for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
-        iNumHyps = iNumHypsBackup;
-        ValType hope_bleu, hope_model;
-        for(size_t i=0; i< train->cur_size(); i++) {
-          const MiraFeatureVector& vec=train->featuresAt(i);
-          ValType score = wv.score(vec);
-          ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
-          // Hope
-          if(i==0 || (hope_scale*score + bleu) > hope_score) {
-            hope_score = hope_scale*score + bleu;
-            hope_index = i;
-            hope_bleu = bleu;
-            hope_model = score;
-          }
-          // Fear
-          if(i==0 || (score - bleu) > fear_score) {
-            fear_score = score - bleu;
-            fear_index = i;
-          }
-          // Model
-          if(i==0 || score > model_score) {
-            model_score = score;
-            model_index = i;
-          }
-          iNumHyps++;
-        }
-        // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
-        // where model score is having far more influence than BLEU
-        hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
-        if(safe_hope && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
-          hope_scale = abs(hope_bleu) / abs(hope_model);
-        else break;
-      }
+    size_t sentenceIndex = 0;
+    for(decoder->reset();!decoder->finished(); decoder->next()) {
+      HopeFearData hfd;
+      decoder->HopeFear(bg,wv,&hfd);
+    
       // Update weights
-      if(hope_index!=fear_index) {
+      if (!hfd.hopeFearEqual && hfd.hopeBleu  > hfd.fearBleu) { 
         // Vector difference
-        const MiraFeatureVector& hope=train->featuresAt(hope_index);
-        const MiraFeatureVector& fear=train->featuresAt(fear_index);
-        MiraFeatureVector diff = hope - fear;
+        MiraFeatureVector diff = hfd.hopeFeatures - hfd.fearFeatures;
         // Bleu difference
-        const vector<float>& hope_stats = train->scoresAt(hope_index);
-        ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
-        const vector<float>& fear_stats = train->scoresAt(fear_index);
-        ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
-        assert(hopeBleu + 1e-8 >= fearBleu);
-        ValType delta = hopeBleu - fearBleu;
+        //assert(hfd.hopeBleu + 1e-8 >= hfd.fearBleu);
+        ValType delta = hfd.hopeBleu - hfd.fearBleu;
         // Loss and update
         ValType diff_score = wv.score(diff);
         ValType loss = delta - diff_score;
         if(verbose) {
-          cerr << "Updating sent " << train->cur_id() << endl;
+          cerr << "Updating sent " << sentenceIndex << endl;
           cerr << "Wght: " << wv << endl;
-          cerr << "Hope: " << hope << " BLEU:" << hopeBleu << " Score:" << wv.score(hope) << endl;
-          cerr << "Fear: " << fear << " BLEU:" << fearBleu << " Score:" << wv.score(fear) << endl;
+          cerr << "Hope: " << hfd.hopeFeatures << " BLEU:" << hfd.hopeBleu << " Score:" << wv.score(hfd.hopeFeatures) << endl;
+          cerr << "Fear: " << hfd.fearFeatures << " BLEU:" << hfd.fearBleu << " Score:" << wv.score(hfd.fearFeatures) << endl;
           cerr << "Diff: " << diff << " BLEU:" << delta << " Score:" << diff_score << endl;
-          cerr << "Loss: " << loss << " Scale: " << hope_scale << endl;
+          cerr << "Loss: " << loss <<  " Scale: " << 1 << endl;
           cerr << endl;
         }
         if(loss > 0) {
@@ -273,16 +265,16 @@ int main(int argc, char** argv)
           iNumUpdates++;
         }
         // Update BLEU statistics
-        const vector<float>& model_stats = train->scoresAt(model_index);
         for(size_t k=0; k<bg.size(); k++) {
           bg[k]*=decay;
           if(model_bg)
-            bg[k]+=model_stats[k];
+            bg[k]+=hfd.modelStats[k];
           else
-            bg[k]+=hope_stats[k];
+            bg[k]+=hfd.hopeStats[k];
         }
       }
       iNumExamples++;
+      ++sentenceIndex;
     }
     // Training Epoch summary
     cerr << iNumUpdates << "/" << iNumExamples << " updates"
@@ -291,15 +283,16 @@ int main(int argc, char** argv)
 
     // Evaluate current average weights
     AvgWeightVector avg = wv.avg();
-    ValType bleu = evaluate(train.get(), avg);
+    ValType bleu = decoder->Evaluate(avg);
     cerr << ", BLEU = " << bleu << endl;
     if(bleu > bestBleu) {
+      /*
       size_t num_dense = train->num_dense();
       if(initDenseSize>0 && initDenseSize!=num_dense) {
         cerr << "Error: Initial dense feature count and dense feature count from n-best do not match: "
              << initDenseSize << "!=" << num_dense << endl;
         exit(1);
-      }
+      }*/
       // Write to a file
       ostream* out;
       ofstream outFile;
@@ -314,11 +307,11 @@ int main(int argc, char** argv)
         out = &cout;
       }
       for(size_t i=0; i<avg.size(); i++) {
-        if(i<num_dense)
+        if(i<initDenseSize)
           *out << "F" << i << " " << avg.weight(i) << endl;
         else {
           if(abs(avg.weight(i))>1e-8)
-            *out << SparseVector::decode(i-num_dense) << " " << avg.weight(i) << endl;
+            *out << SparseVector::decode(i-initDenseSize) << " " << avg.weight(i) << endl;
         }
       }
       outFile.close();

From 2a611194a26f341c585eaf4e8aa785292450d1c8 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 21 Jul 2014 11:43:37 +0100
Subject: [PATCH 76/84] reinstate new kbmira args

---
 scripts/training/mert-moses.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index d1ac5828aa..dd41538f57 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -863,8 +863,8 @@
     $mira_settings .= "$batch_mira_args ";
   }
 
-  $mira_settings .= " --dense-init run$run.$weights_in_file";
-  #$mira_settings .= " --dense-init run$run.dense";
+  #$mira_settings .= " --dense-init run$run.$weights_in_file";
+  $mira_settings .= " --dense-init run$run.dense";
   if (-e "run$run.sparse-weights") {
     $mira_settings .= " --sparse-init run$run.sparse-weights";
   }

From bb0a0925b5a99988d152bd11e9254cf22ed7ab3f Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 21 Jul 2014 14:35:55 +0100
Subject: [PATCH 77/84] fd leak

---
 mert/HopeFearDecoder.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
index 5f5c599660..ea36874986 100644
--- a/mert/HopeFearDecoder.cpp
+++ b/mert/HopeFearDecoder.cpp
@@ -176,7 +176,10 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
     if (di->path().filename() == kWeights) continue;
     Graph graph(vocab_);
     size_t id = boost::lexical_cast<size_t>(di->path().stem().string());
-    util::FilePiece file(di->path().string().c_str());
+    //need to manage the fd, as FilePiece doesn't necessarily close it.
+    util::scoped_fd fd(util::OpenReadOrThrow(di->path().string().c_str()));
+    //util::FilePiece file(di->path().string().c_str());
+    util::FilePiece file(fd.get()); 
     ReadGraph(file,graph);
 
     //cerr << "ref length " << references_.Length(id) << endl;

From 1d64bfc51efecfb8e432f051d48eee2fc7d71374 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Mon, 21 Jul 2014 17:39:45 +0100
Subject: [PATCH 78/84] Try to prevent double closure of files

---
 mert/HopeFearDecoder.cpp | 3 +--
 util/read_compressed.cc  | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/mert/HopeFearDecoder.cpp b/mert/HopeFearDecoder.cpp
index ea36874986..3a8174d386 100644
--- a/mert/HopeFearDecoder.cpp
+++ b/mert/HopeFearDecoder.cpp
@@ -176,10 +176,9 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
     if (di->path().filename() == kWeights) continue;
     Graph graph(vocab_);
     size_t id = boost::lexical_cast<size_t>(di->path().stem().string());
-    //need to manage the fd, as FilePiece doesn't necessarily close it.
     util::scoped_fd fd(util::OpenReadOrThrow(di->path().string().c_str()));
     //util::FilePiece file(di->path().string().c_str());
-    util::FilePiece file(fd.get()); 
+    util::FilePiece file(fd.release()); 
     ReadGraph(file,graph);
 
     //cerr << "ref length " << references_.Length(id) << endl;
diff --git a/util/read_compressed.cc b/util/read_compressed.cc
index e1f4cd7e3b..f306e58315 100644
--- a/util/read_compressed.cc
+++ b/util/read_compressed.cc
@@ -374,7 +374,7 @@ ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, co
     header.resize(original + got);
   }
   if (header.empty()) {
-    hold.release();
+    //hold.release();
     return new Complete();
   }
   switch (DetectMagic(&header[0], header.size())) {

From d097e31038f38e682029ff13275881597563be08 Mon Sep 17 00:00:00 2001
From: Ulrich Germann <ugermann@inf.ed.ac.uk>
Date: Tue, 22 Jul 2014 00:28:10 +0100
Subject: [PATCH 79/84] Added how-to for memory-mapped suffix array phrase
 tables.

---
 doc/Mmsapt.howto | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 doc/Mmsapt.howto

diff --git a/doc/Mmsapt.howto b/doc/Mmsapt.howto
new file mode 100644
index 0000000000..6a48fa9c64
--- /dev/null
+++ b/doc/Mmsapt.howto
@@ -0,0 +1,31 @@
+How to use memory-mapped suffix array phrase tables in the moses decoder 
+(phrase-based decoding only)
+
+1. Compile with the bjam switch --with-mm
+
+2. You need 
+   - sentences aligned text files
+   - the word alignment between these files in symal output format
+
+3. Build binary files
+
+   Let 
+   ${L1} be the extension of the language that you are translating from,
+   ${L2} the extension of the language that you want to translate into, and 
+   ${CORPUS} the name of the word-aligned training corpus
+
+   % zcat ${CORPUS}.${L1}.gz  | mtt-build -i -o /some/path/${CORPUS}.${L1}
+   % zcat ${CORPUS}.${L2}.gz  | mtt-build -i -o /some/path/${CORPUS}.${L2}
+   % zcat ${CORPUS}.${L1}-${L2}.symal.gz | symal2mam /some/path/${CORPUS}.${L1}-${L2}.mam
+   % mmlex-build /some/path/${CORPUS} ${L1} ${L2} -o /some/path/${CORPUS}.${L1}-${L2}.lex -c /some/path/${CORPUS}.${L1}-${L2}.coc
+
+4. Define line in moses.ini
+
+   The best configuration of phrase table features is still under investigation. 
+   For the time being, try this:
+
+   Mmsapt name=PT0 output-factor=0 num-features=9 base=/some/path/${CORPUS} L1=${L1} L2=${L2} pfwd=g pbwd=g smooth=0 sample=1000 workers=1 
+
+   You can increase the number of workers for sampling (a bit faster), 
+   but you'll lose replicability of the translation output. 
+

From 62b476cd453a6a69ba43b150b37209fe1c8dad62 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Tue, 22 Jul 2014 11:43:20 +0800
Subject: [PATCH 80/84] Fix fd leak noticed by Barry Haddow

---
 util/read_compressed.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/util/read_compressed.cc b/util/read_compressed.cc
index e1f4cd7e3b..cee98040ba 100644
--- a/util/read_compressed.cc
+++ b/util/read_compressed.cc
@@ -374,7 +374,6 @@ ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, co
     header.resize(original + got);
   }
   if (header.empty()) {
-    hold.release();
     return new Complete();
   }
   switch (DetectMagic(&header[0], header.size())) {

From 5a45fc71eb27c57b0d71d340b35f8f9f2f8970a4 Mon Sep 17 00:00:00 2001
From: Barry Haddow <barry.haddow@gmail.com>
Date: Tue, 22 Jul 2014 09:24:23 +0100
Subject: [PATCH 81/84] Some more debug

---
 mert/ForestRescore.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mert/ForestRescore.cpp b/mert/ForestRescore.cpp
index c88b58e4ca..0d8653e4b3 100644
--- a/mert/ForestRescore.cpp
+++ b/mert/ForestRescore.cpp
@@ -367,7 +367,17 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
         FeatureStatsType totalScore = incomingScore;
         if (bleuWeight) { 
           FeatureStatsType bleuScore = bleuScorer.Score(*(incoming[ei]), vertex, bleuStats);
-          UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
+          if (isnan(bleuScore)) {
+            cerr << "WARN: bleu score undefined" << endl;
+            cerr << "\tVertex id : " << vi << endl;
+            cerr << "\tBleu stats : ";
+            for (size_t i = 0; i < bleuStats.size(); ++i) {
+              cerr << bleuStats[i] << ",";
+            }
+            cerr << endl;
+            bleuScore = 0;
+          }
+          //UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
           totalScore += bleuWeight * bleuScore;
         //  cerr << bleuScore << " Total: " << incomingScore << endl << endl;
           //cerr << "is " << incomingScore << " bs " << bleuScore << endl;

From 36919b53a780136bf599374719a557b97c4b7222 Mon Sep 17 00:00:00 2001
From: Philipp Koehn <phi@jhu.edu>
Date: Tue, 22 Jul 2014 10:10:34 -0400
Subject: [PATCH 82/84] example files for memory mapped suffix array phrase
 table by Uli Germann

---
 scripts/ems/example/config.basic        | 5 +++++
 scripts/ems/example/config.factored     | 5 +++++
 scripts/ems/example/config.hierarchical | 5 +++++
 scripts/ems/example/config.syntax       | 5 +++++
 scripts/ems/example/config.toy          | 5 +++++
 5 files changed, 25 insertions(+)

diff --git a/scripts/ems/example/config.basic b/scripts/ems/example/config.basic
index 1db8154f53..8a813777ea 100644
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@@ -371,6 +371,11 @@ alignment-symmetrization-method = grow-diag-final-and
 #mml-before-wa = "-proportion 0.9"
 #mml-after-wa = "-proportion 0.9"
 
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
 ### create a bilingual concordancer for the model
 #
 #biconcor = $moses-bin-dir/biconcor
diff --git a/scripts/ems/example/config.factored b/scripts/ems/example/config.factored
index c3a6b2a856..2faa5de4c0 100644
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@@ -391,6 +391,11 @@ alignment-symmetrization-method = grow-diag-final-and
 #mml-before-wa = "-proportion 0.9"
 #mml-after-wa = "-proportion 0.9"
 
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
 ### create a bilingual concordancer for the model
 #
 #biconcor = $moses-bin-dir/biconcor
diff --git a/scripts/ems/example/config.hierarchical b/scripts/ems/example/config.hierarchical
index 673ad64a90..0494228fff 100644
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@@ -371,6 +371,11 @@ alignment-symmetrization-method = grow-diag-final-and
 #mml-before-wa = "-proportion 0.9"
 #mml-after-wa = "-proportion 0.9"
 
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
 ### create a bilingual concordancer for the model
 #
 #biconcor = $moses-bin-dir/biconcor
diff --git a/scripts/ems/example/config.syntax b/scripts/ems/example/config.syntax
index 7df60f9906..93248c6726 100644
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@@ -375,6 +375,11 @@ alignment-symmetrization-method = grow-diag-final-and
 #mml-before-wa = "-proportion 0.9"
 #mml-after-wa = "-proportion 0.9"
 
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
 ### create a bilingual concordancer for the model
 #
 #biconcor = $moses-bin-dir/biconcor
diff --git a/scripts/ems/example/config.toy b/scripts/ems/example/config.toy
index bd328a18ee..a89ea428f8 100644
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@@ -355,6 +355,11 @@ alignment-symmetrization-method = grow-diag-final-and
 #mml-before-wa = "-proportion 0.9"
 #mml-after-wa = "-proportion 0.9"
 
+### build memory mapped suffix array phrase table
+# (binarizing the reordering table is a good idea, since filtering makes little sense)
+#mmsapt = "num-features=9 pfwd=g+ pbwd=g+ smooth=0 sample=1000 workers=1"
+#binarize-all = $moses-script-dir/training/binarize-model.perl
+
 ### create a bilingual concordancer for the model
 #
 #biconcor = $moses-bin-dir/biconcor

From 55ae15a6f89c76a3b4f5d671b8dda4cbe56eb946 Mon Sep 17 00:00:00 2001
From: Philipp Koehn <phi@jhu.edu>
Date: Tue, 22 Jul 2014 10:12:14 -0400
Subject: [PATCH 83/84] integration of Uli Germann's memory mapped suffix array
 phrase table into EMS

---
 scripts/ems/experiment.meta        | 12 +++++++++---
 scripts/ems/experiment.perl        | 15 ++++++++++++---
 scripts/training/build-mmsapt.perl | 22 ++++++++++++++++++++++
 scripts/training/train-model.perl  | 22 ++++++++++++++--------
 4 files changed, 57 insertions(+), 14 deletions(-)
 create mode 100755 scripts/training/build-mmsapt.perl

diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index 9785d89404..1a5d954bcc 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -563,7 +563,6 @@ extract-phrases
 	rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm domain-features baseline-extract lexicalized-reordering
         only-existence-matters: domain-features
 	default-name: model/extract
-	ignore-if: suffix-array
 build-reordering
 	in: extracted-phrases
 	out: reordering-table
@@ -576,7 +575,14 @@ build-ttable
 	out: phrase-translation-table
 	rerun-on-change: translation-factors hierarchical-rule-set score-settings training-options script EVALUATION:report-precision-by-coverage include-word-alignment-in-rules domain-features
 	default-name: model/phrase-table
-	ignore-if: suffix-array
+	ignore-if: suffix-array mmsapt
+	final-model: yes
+build-mmsapt
+	in: corpus-mml-postfilter=OR=word-alignment corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus
+	out: phrase-translation-table
+	ignore-unless: mmsapt
+	default-name: model/phrase-table-mmsapt
+	template: $moses-script-dir/training/build-mmsapt.perl --alignment IN.$alignment-symmetrization-method --corpus IN1 --f $input-extension --e $output-extension --dir OUT --settings '$mmsapt'
 	final-model: yes
 sigtest-filter-suffix-array
 	in: corpus-mml-postfilter=OR=corpus-mml-prefilter=OR=corpus
@@ -635,7 +641,7 @@ create-config
 	in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm 
 	out: config
 	ignore-if: use-hiero
-	rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
+	rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt
 	default-name: model/moses.ini
 	error: Unknown option
 	final-model: yes
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 4f67a6d8a5..833103284b 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -1513,9 +1513,9 @@ sub check_if_crashed {
 			     'error','killed','core dumped','can\'t read',
 			     'no such file or directory','unknown option',
 			     'died at','exit code','permission denied',
-           'segmentation fault','abort',
-           'no space left on device',
-           'can\'t locate', 'unrecognized option', 'Exception') {
+			     'segmentation fault','abort',
+			     'no space left on device', ': not found',
+			     'can\'t locate', 'unrecognized option', 'Exception') {
 	    if (/$pattern/i) {
 		my $not_error = 0;
 		if (defined($NOT_ERROR{&defined_step_id($i)})) {
@@ -2349,6 +2349,15 @@ sub get_config_tables {
       }
     }
 
+    # memory mapped suffix array phrase table
+    my $mmsapt = &get("TRAINING:mmsapt");
+    if (defined($mmsapt)) {
+      $ptImpl = 11; # mmsapt
+      $mmsapt =~ s/num-features=(\d+) // || die("ERROR: mmsapt setting needs to set num-features");
+      $numFF = $1;
+      $cmd .= "-mmsapt '$mmsapt' ";
+    }
+
     # additional settings for factored models
     $cmd .= &get_table_name_settings("translation-factors","phrase-translation-table", $phrase_translation_table);
     $cmd = trim($cmd);
diff --git a/scripts/training/build-mmsapt.perl b/scripts/training/build-mmsapt.perl
new file mode 100755
index 0000000000..2135c12c97
--- /dev/null
+++ b/scripts/training/build-mmsapt.perl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long "GetOptions";
+
+my ($DIR,$F,$E,$ALIGNMENT,$CORPUS,$SETTINGS);
+die("ERROR: syntax is --alignment FILE --corpus FILESTEM --f EXT --e EXT --DIR OUTDIR --settings STRING")
+    unless &GetOptions('DIR=s' => \$DIR,
+		       'f=s' => \$F,
+		       'e=s' => \$E,
+		       'corpus=s' => \$CORPUS,
+		       'alignment=s' => \$ALIGNMENT,
+		       'settings=s' => \$SETTINGS)
+	   && defined($DIR) && defined($F) && defined($E) && defined($CORPUS) && defined($ALIGNMENT)
+           && -e $ALIGNMENT && -e "$CORPUS.$F" && -e "$CORPUS.$E";
+
+`mkdir $DIR`;
+`/opt/moses/bin/mtt-build < $CORPUS.$F -i -o $DIR/$F`;
+`/opt/moses/bin/mtt-build < $CORPUS.$E -i -o $DIR/$E`;
+`/opt/moses/bin/symal2mam < $ALIGNMENT $DIR/$F-$E.mam`;
+`/opt/moses/bin/mmlex-build $DIR/ $F $E -o $DIR/$F-$E.lex -c $DIR/$F-$E.cooc`;
+
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index a9ed585354..22ecc5ff98 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -37,7 +37,7 @@
    $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
    $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
    $_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
-   @_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,
+   @_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,$_MMSAPT,
    @_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_ALIGNMENT,
    $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $_NUM_LATTICE_FEATURES, $IGNORE, $_FLEXIBILITY_SCORE, $_EXTRACT_COMMAND);
 my $_BASELINE_CORPUS = "";
@@ -121,9 +121,10 @@
 		       'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT,
 		       'config=s' => \$_CONFIG,
 		       'osm-model=s' => \$_OSM,
-			'osm-setting=s' => \$_OSM_FACTORS,
-			'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
-			'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,		
+		       'osm-setting=s' => \$_OSM_FACTORS,
+		       'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
+		       'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,		
+		       'mmsapt=s' => \$_MMSAPT,
 		       'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
 		       'do-steps=s' => \$_DO_STEPS,
 		       'memscore:s' => \$_MEMSCORE,
@@ -1950,14 +1951,20 @@ sub create_ini {
      $phrase_table_impl_name = "PhraseDictionaryOnDisk" if $phrase_table_impl==2;
      $phrase_table_impl_name = "PhraseDictionaryMemory" if $phrase_table_impl==6;
      $phrase_table_impl_name = "PhraseDictionaryALSuffixArray" if $phrase_table_impl==10;
+     $phrase_table_impl_name = "Mmsapt" if $phrase_table_impl==11;
+     $file .= "/" if $phrase_table_impl==11 && $file !~ /\/$/;
 
-     #table limit
+     # table limit (maximum number of translation options per input phrase)
      my $table_limit = 0;
      if ($i == 0) {
        $table_limit = 20;
      }
+
      # sum up...
-     $feature_spec .= "$phrase_table_impl_name name=TranslationModel$i table-limit=$table_limit num-features=$basic_weight_count path=$file input-factor=$input_factor output-factor=$output_factor\n";
+     $feature_spec .= "$phrase_table_impl_name name=TranslationModel$i num-features=$basic_weight_count ".($phrase_table_impl==11?"base":"path")."=$file input-factor=$input_factor output-factor=$output_factor";
+     $feature_spec .= " L1=$___F L2=$___E ".$_MMSAPT if defined($_MMSAPT); # extra settings for memory mapped suffix array phrase table
+     $feature_spec .= " table-limit=$table_limit" unless defined($_MMSAPT);
+     $feature_spec .= "\n";
      $weight_spec .= "TranslationModel$i=";
      for(my $j=0;$j<$basic_weight_count;$j++) { $weight_spec .= " 0.2"; }
      $weight_spec .= "\n";
@@ -1970,8 +1977,7 @@ sub create_ini {
      exit 1 if $i < $stepsused{"T"}; # fatal to define less
    }
 
-   if ($_TRANSLITERATION_PHRASE_TABLE){
-		
+   if ($_TRANSLITERATION_PHRASE_TABLE) {
      $feature_spec .= "PhraseDictionaryMemory name=TranslationModel$i table-limit=100 num-features=4 path=$_TRANSLITERATION_PHRASE_TABLE input-factor=0 output-factor=0\n";
      $weight_spec .= "TranslationModel$i= 0.2 0.2 0.2 0.2\n";
      $i++;	

From c5147cbac4cf346e1ec8d137d036ae7911a3dab0 Mon Sep 17 00:00:00 2001
From: Hieu Hoang <hieuhoang@gmail.com>
Date: Wed, 23 Jul 2014 12:35:56 +0100
Subject: [PATCH 84/84] eclipse

---
 contrib/other-builds/extract-rules/.project | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/contrib/other-builds/extract-rules/.project b/contrib/other-builds/extract-rules/.project
index d640499a85..e4ceff8577 100644
--- a/contrib/other-builds/extract-rules/.project
+++ b/contrib/other-builds/extract-rules/.project
@@ -25,6 +25,21 @@
 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
 	</natures>
 	<linkedResources>
+		<link>
+			<name>Hole.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
+		</link>
+		<link>
+			<name>HoleCollection.cpp</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
+		</link>
+		<link>
+			<name>HoleCollection.h</name>
+			<type>1</type>
+			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
+		</link>
 		<link>
 			<name>InputFileStream.cpp</name>
 			<type>1</type>
@@ -86,9 +101,9 @@
 			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
 		</link>
 		<link>
-			<name>extract-main.cpp</name>
+			<name>extract-rules-main.cpp</name>
 			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-main.cpp</locationURI>
+			<locationURI>PARENT-1-ECLIPSE_HOME/github/mosesdecoder/phrase-extract/extract-rules-main.cpp</locationURI>
 		</link>
 		<link>
 			<name>tables-core.cpp</name>