From 565b4ea1ae101db9343eb339676c42b5c51544ca Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 10 Sep 2013 08:58:45 +0100 Subject: [PATCH 01/84] Skeleton sparse reordering feature --- moses/FF/Factory.cpp | 2 + moses/FF/SparseReorderingFeature.cpp | 26 ++++++++++++ moses/FF/SparseReorderingFeature.h | 60 ++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) create mode 100644 moses/FF/SparseReorderingFeature.cpp create mode 100644 moses/FF/SparseReorderingFeature.h diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 01b12d9207..3bf702d412 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -25,6 +25,7 @@ #include "moses/FF/PhrasePairFeature.h" #include "moses/FF/PhraseLengthFeature.h" #include "moses/FF/DistortionScoreProducer.h" +#include "moses/FF/SparseReorderingFeature.h" #include "moses/FF/WordPenaltyProducer.h" #include "moses/FF/InputFeature.h" #include "moses/FF/PhrasePenalty.h" @@ -142,6 +143,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(ControlRecombination); MOSES_FNAME(SkeletonStatelessFF); MOSES_FNAME(SkeletonStatefulFF); + MOSES_FNAME(SparseReorderingFeature); #ifdef HAVE_SYNLM MOSES_FNAME(SyntacticLanguageModel); diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp new file mode 100644 index 0000000000..3955a5fdaa --- /dev/null +++ b/moses/FF/SparseReorderingFeature.cpp @@ -0,0 +1,26 @@ +#include + +#include "SparseReorderingFeature.h" + +using namespace std; + +namespace Moses +{ + +SparseReorderingFeature::SparseReorderingFeature(const std::string &line) + :StatefulFeatureFunction("StatefulFeatureFunction", line) +{ + cerr << "Constructing a Sparse Reordering feature" << endl; +} + +FFState* SparseReorderingFeature::EvaluateChart( + const ChartHypothesis& /* cur_hypo */, + int /* featureID - used to index the state in the previous hypotheses */, + ScoreComponentCollection* accumulator) const +{ + return new SparseReorderingState(); +} + + +} + diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h new file mode 100644 index 0000000000..daf137a09c --- /dev/null +++ b/moses/FF/SparseReorderingFeature.h @@ -0,0 +1,60 @@ +#pragma once + +#include + +#include "StatefulFeatureFunction.h" +#include "FFState.h" + +namespace Moses +{ + +class SparseReorderingState : public FFState +{ +public: + int Compare(const FFState& other) const + { + return 0; + } +}; + +class SparseReorderingFeature : public StatefulFeatureFunction +{ +public: + SparseReorderingFeature(const std::string &line); + + bool IsUseable(const FactorMask &mask) const + { return true; } + + void Evaluate(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const + {} + void Evaluate(const InputType &input + , const InputPath &inputPath + , ScoreComponentCollection &scoreBreakdown) const + {} + FFState* Evaluate( + const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const + { + return new SparseReorderingState(); + } + + FFState* EvaluateChart( + const ChartHypothesis& /* cur_hypo */, + int /* featureID - used to index the state in the previous hypotheses */, + ScoreComponentCollection* accumulator) const; + + virtual const FFState* EmptyHypothesisState(const InputType &input) const + { + return new SparseReorderingState(); + } + + +}; + + +} + From f7c53fef552fb1c14047dd0577ad9ade8da5acd0 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 10 Sep 2013 11:20:14 +0100 Subject: [PATCH 02/84] Set dense feature count to 0 --- moses/FF/SparseReorderingFeature.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 3955a5fdaa..308093617b 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -1,5 +1,7 @@ #include +#include "moses/ChartHypothesis.h" + #include "SparseReorderingFeature.h" using namespace std; @@ -8,16 +10,20 @@ namespace Moses { SparseReorderingFeature::SparseReorderingFeature(const std::string &line) - :StatefulFeatureFunction("StatefulFeatureFunction", line) + :StatefulFeatureFunction("StatefulFeatureFunction",0, line) { cerr << "Constructing a Sparse Reordering feature" << endl; } FFState* SparseReorderingFeature::EvaluateChart( - const ChartHypothesis& /* cur_hypo */, - int /* featureID - used to index the state in the previous hypotheses */, + const ChartHypothesis& cur_hypo , + int featureID /*- used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const { + // get index map for underlying hypotheses + const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = + cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap(); + return new SparseReorderingState(); } From bd7f9344b8c2642b9ab88196e245cc8e6367e41d Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Wed, 11 Sep 2013 22:10:23 +0100 Subject: [PATCH 03/84] Pairs of non-terminals --- moses/FF/SparseReorderingFeature.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 308093617b..1a4becba6a 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -23,6 +23,26 @@ FFState* SparseReorderingFeature::EvaluateChart( // get index map for underlying hypotheses const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap(); + + //Find all the pairs of non-terminals + //Are they forward or reversed relative to each other? + //Add features for their boundary words + + //Get mapping from target to source, in target order + vector > targetNTs; //(srcIdx,targetPos) + for (size_t targetIdx = 0; targetIdx < nonTermIndexMap.size(); ++targetIdx) { + size_t srcNTIdx; + if ((srcNTIdx = nonTermIndexMap[targetIdx]) == NOT_FOUND) continue; + targetNTs.push_back(pair (srcNTIdx,targetIdx)); + } + for (size_t i = 0; i < targetNTs.size(); ++i) { + for (size_t j = i+1; j < targetNTs.size(); ++j) { + size_t src1 = targetNTs[i].first; + size_t src2 = targetNTs[j].first; + //NT pair (src1,src2) maps to (i,j) + cerr << src1 << " -> " << i << " , " << src2 << " -> " << j << endl; + } + } return new SparseReorderingState(); } From 7047b4e197de8fb70c56dcbf00fe19cdf67aea0b Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Thu, 12 Sep 2013 13:39:59 +0100 Subject: [PATCH 04/84] Sparse reordering for non-terminal pairs --- moses/FF/SparseReorderingFeature.cpp | 42 +++++++++++++++++++++++++++- moses/FF/SparseReorderingFeature.h | 5 ++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 1a4becba6a..16e175fff5 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -1,6 +1,8 @@ #include #include "moses/ChartHypothesis.h" +#include "moses/ChartManager.h" +#include "moses/Sentence.h" #include "SparseReorderingFeature.h" @@ -15,6 +17,34 @@ SparseReorderingFeature::SparseReorderingFeature(const std::string &line) cerr << "Constructing a Sparse Reordering feature" << endl; } +static void AddFeatureWordPair(const string& prefix, const string& suffix, + const Word& word1, const Word& word2, ScoreComponentCollection* accumulator, FactorType factor = 0) { + stringstream buf; + buf << prefix << word1[factor]->GetString() << "_" << word2[factor]->GetString() << suffix; + accumulator->SparsePlusEquals(buf.str(), 1); +} + + +void SparseReorderingFeature::AddNonTerminalPairFeatures( + const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2, + bool isMonotone, ScoreComponentCollection* accumulator) const { + //TODO: remove string concatenation + const static string monotone = "_M"; + const static string swap = "_S"; + const static string prefixes[] = + { "srf_slslw_", "srf_slsrw_", "srf_srslw_", "srf_srsrw_"}; + + string direction = isMonotone ? monotone : swap; + AddFeatureWordPair(prefixes[0], direction, + sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetStartPos()), accumulator); + AddFeatureWordPair(prefixes[1], direction, + sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetEndPos()), accumulator); + AddFeatureWordPair(prefixes[2], direction, + sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator); + AddFeatureWordPair(prefixes[3], direction, + sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator); +} + FFState* SparseReorderingFeature::EvaluateChart( const ChartHypothesis& cur_hypo , int featureID /*- used to index the state in the previous hypotheses */, @@ -35,12 +65,22 @@ FFState* SparseReorderingFeature::EvaluateChart( if ((srcNTIdx = nonTermIndexMap[targetIdx]) == NOT_FOUND) continue; targetNTs.push_back(pair (srcNTIdx,targetIdx)); } + //Add features for pairs of non-terminals for (size_t i = 0; i < targetNTs.size(); ++i) { for (size_t j = i+1; j < targetNTs.size(); ++j) { size_t src1 = targetNTs[i].first; size_t src2 = targetNTs[j].first; //NT pair (src1,src2) maps to (i,j) - cerr << src1 << " -> " << i << " , " << src2 << " -> " << j << endl; + bool isMonotone = true; + if ((src1 < src2 && i > j) || (src1 > src2 && i < j)) isMonotone = false; + //NB: should throw bad_cast for Lattice input + const Sentence& sentence = + dynamic_cast(cur_hypo.GetManager().GetSource()); + AddNonTerminalPairFeatures(sentence, + cur_hypo.GetPrevHypo(src1)->GetCurrSourceRange(), + cur_hypo.GetPrevHypo(src2)->GetCurrSourceRange(), + isMonotone, + accumulator); } } diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h index daf137a09c..73f1670cf1 100644 --- a/moses/FF/SparseReorderingFeature.h +++ b/moses/FF/SparseReorderingFeature.h @@ -52,6 +52,11 @@ class SparseReorderingFeature : public StatefulFeatureFunction return new SparseReorderingState(); } +private: + +void AddNonTerminalPairFeatures( + const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2, + bool isMonotone, ScoreComponentCollection* accumulator) const; }; From 3dff33069499d817af145ab9e6585022ef912df3 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Thu, 12 Sep 2013 18:55:10 +0100 Subject: [PATCH 05/84] Configuration --- moses/FF/SparseReorderingFeature.cpp | 54 +++++++++++++++++++++++++--- moses/FF/SparseReorderingFeature.h | 20 +++++++++-- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 16e175fff5..6127fde414 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -4,6 +4,8 @@ #include "moses/ChartManager.h" #include "moses/Sentence.h" +#include "util/exception.hh" + #include "SparseReorderingFeature.h" using namespace std; @@ -12,9 +14,47 @@ namespace Moses { SparseReorderingFeature::SparseReorderingFeature(const std::string &line) - :StatefulFeatureFunction("StatefulFeatureFunction",0, line) + :StatefulFeatureFunction("StatefulFeatureFunction",0, line), + m_sourceFactor(0), + m_targetFactor(0), + m_sourceVocabFile(""), + m_targetVocabFile("") { + + /* + Configuration of features. + factor - Which factor should it apply to + type - what type of sparse reordering feature. e.g. block (modelled on Matthias + Huck's EAMT 2012 features) + word - which words to include, e.g. src_bdry, src_all, tgt_bdry , ... + vocab - vocab file to limit it to + orientation - e.g. lr, etc. + */ cerr << "Constructing a Sparse Reordering feature" << endl; + ReadParameters(); + LoadVocabulary(m_sourceVocabFile, m_sourceVocab); + LoadVocabulary(m_targetVocabFile, m_targetVocab); +} + +void SparseReorderingFeature::SetParameter(const std::string& key, const std::string& value) { + if (key == "input-factor") { + m_sourceFactor = Scan(value); + } else if (key == "output-factor") { + m_targetFactor = Scan(value); + } else if (key == "input-vocab-file") { + m_sourceVocabFile = value; + } else if (key == "output-vocab-file") { + m_targetVocabFile = value; + } else { + FeatureFunction::SetParameter(key, value); + } +} + +void SparseReorderingFeature::LoadVocabulary(const std::string& filename, boost::unordered_set& vocab) +{ + if (filename.empty()) return; + ifstream in(filename.c_str()); + UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename); } static void AddFeatureWordPair(const string& prefix, const string& suffix, @@ -54,9 +94,15 @@ FFState* SparseReorderingFeature::EvaluateChart( const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap(); - //Find all the pairs of non-terminals - //Are they forward or reversed relative to each other? - //Add features for their boundary words + //The Huck features. For a rule with source side: + // abXcdXef + //We first have to split into blocks: + // ab X cd X ef + //Then we extract features based in the boundary words of the neighbouring blocks + //For the block pair, we use the right word of the left block, and the left + //word of the right block. + + WordsRange sourceRange = cur_hypo.GetCurrSourceRange(); //Get mapping from target to source, in target order vector > targetNTs; //(srcIdx,targetPos) diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h index 73f1670cf1..408b00b342 100644 --- a/moses/FF/SparseReorderingFeature.h +++ b/moses/FF/SparseReorderingFeature.h @@ -2,6 +2,8 @@ #include +#include + #include "StatefulFeatureFunction.h" #include "FFState.h" @@ -25,6 +27,8 @@ class SparseReorderingFeature : public StatefulFeatureFunction bool IsUseable(const FactorMask &mask) const { return true; } + void SetParameter(const std::string& key, const std::string& value); + void Evaluate(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown @@ -54,9 +58,19 @@ class SparseReorderingFeature : public StatefulFeatureFunction private: -void AddNonTerminalPairFeatures( - const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2, - bool isMonotone, ScoreComponentCollection* accumulator) const; + void AddNonTerminalPairFeatures( + const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2, + bool isMonotone, ScoreComponentCollection* accumulator) const; + + void LoadVocabulary(const std::string& filename, boost::unordered_set& vocab); + + FactorType m_sourceFactor; + FactorType m_targetFactor; + std::string m_sourceVocabFile; + std::string m_targetVocabFile; + + boost::unordered_set m_sourceVocab; + boost::unordered_set m_targetVocab; }; From 8651f8da894c45c83d946745c599c04067811be2 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 13 Sep 2013 08:48:44 +0100 Subject: [PATCH 06/84] Extract blocks --- moses/FF/SparseReorderingFeature.cpp | 45 +++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 6127fde414..3e955bce26 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -102,7 +102,50 @@ FFState* SparseReorderingFeature::EvaluateChart( //For the block pair, we use the right word of the left block, and the left //word of the right block. - WordsRange sourceRange = cur_hypo.GetCurrSourceRange(); + //Need to get blocks, and their alignment. Each block has a word range (on the + // on the source), a non-terminal flag, and a set of alignment points in the target phrase + + vector sourceNTSpans; + for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) { + sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange()); + } + sort(sourceNTSpans.begin(), sourceNTSpans.end()); //put in source order + cerr << "Source NTs: "; + for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " "; + cerr << endl; + + vector blocks; + blocks.push_back(cur_hypo.GetCurrSourceRange()); + for (vector::const_iterator i = sourceNTSpans.begin(); + i != sourceNTSpans.end(); ++i) { + const WordsRange& prevHypoRange = *i; + WordsRange lastRange = blocks.back(); + blocks.pop_back(); + //split this range into before NT, NT and after NT + if (prevHypoRange.GetStartPos() > lastRange.GetStartPos()) { + blocks.push_back(WordsRange(lastRange.GetStartPos(),prevHypoRange.GetStartPos()-1)); + } + blocks.push_back(prevHypoRange); + if (prevHypoRange.GetEndPos() < lastRange.GetEndPos()) { + blocks.push_back(WordsRange(prevHypoRange.GetEndPos()+1,lastRange.GetEndPos())); + } + } + cerr << "Blocks: "; + for (size_t i = 0; i < blocks.size(); ++i) cerr << blocks[i] << " "; + cerr << endl; + + //this currently doesn't work + const InputPath* inputPath = cur_hypo.GetTranslationOption().GetInputPath(); + //The phrase is always dangling + //cerr << "IP: phrase " << inputPath << endl; + /* + cerr << "NTs "; + for (NonTerminalSet::const_iterator i = inputPath->GetNonTerminalSet().begin(); + i != inputPath->GetNonTerminalSet().end(); ++i) { + cerr << *i << " "; + } + cerr << endl; + */ //Get mapping from target to source, in target order vector > targetNTs; //(srcIdx,targetPos) From 9d874b0ac13da8e9cd2f62942df9dea7d7a8b3df Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 13 Sep 2013 14:44:30 +0100 Subject: [PATCH 07/84] Prints out feature values --- moses/FF/SparseReorderingFeature.cpp | 134 ++++++++++++++++++++++----- 1 file changed, 112 insertions(+), 22 deletions(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 3e955bce26..1fcd5e3b0c 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -91,8 +91,8 @@ FFState* SparseReorderingFeature::EvaluateChart( ScoreComponentCollection* accumulator) const { // get index map for underlying hypotheses - const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = - cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap(); + //const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = + // cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap(); //The Huck features. For a rule with source side: // abXcdXef @@ -105,48 +105,138 @@ FFState* SparseReorderingFeature::EvaluateChart( //Need to get blocks, and their alignment. Each block has a word range (on the // on the source), a non-terminal flag, and a set of alignment points in the target phrase + //We need to be able to map source word position to target word position, as + //much as possible (don't need interior of non-terminals). The alignment info + //objects just give us the mappings between *rule* positions. So if we can + //map source word position to source rule position, and target rule position + //to target word position, then we can map right through. + + size_t sourceStart = cur_hypo.GetCurrSourceRange().GetStartPos(); + size_t sourceSize = cur_hypo.GetCurrSourceRange().GetNumWordsCovered(); + vector sourceNTSpans; for (size_t prevHypoId = 0; prevHypoId < cur_hypo.GetPrevHypos().size(); ++prevHypoId) { sourceNTSpans.push_back(cur_hypo.GetPrevHypo(prevHypoId)->GetCurrSourceRange()); } - sort(sourceNTSpans.begin(), sourceNTSpans.end()); //put in source order - cerr << "Source NTs: "; - for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " "; - cerr << endl; + //put in source order. Is this necessary? + sort(sourceNTSpans.begin(), sourceNTSpans.end()); + //cerr << "Source NTs: "; + //for (size_t i = 0; i < sourceNTSpans.size(); ++i) cerr << sourceNTSpans[i] << " "; + //cerr << endl; - vector blocks; - blocks.push_back(cur_hypo.GetCurrSourceRange()); + typedef pair Block;//flag indicates NT + vector sourceBlocks; + sourceBlocks.push_back(Block(cur_hypo.GetCurrSourceRange(),false)); for (vector::const_iterator i = sourceNTSpans.begin(); i != sourceNTSpans.end(); ++i) { const WordsRange& prevHypoRange = *i; - WordsRange lastRange = blocks.back(); - blocks.pop_back(); + Block lastBlock = sourceBlocks.back(); + sourceBlocks.pop_back(); //split this range into before NT, NT and after NT - if (prevHypoRange.GetStartPos() > lastRange.GetStartPos()) { - blocks.push_back(WordsRange(lastRange.GetStartPos(),prevHypoRange.GetStartPos()-1)); + if (prevHypoRange.GetStartPos() > lastBlock.first.GetStartPos()) { + sourceBlocks.push_back(Block(WordsRange(lastBlock.first.GetStartPos(),prevHypoRange.GetStartPos()-1),false)); } - blocks.push_back(prevHypoRange); - if (prevHypoRange.GetEndPos() < lastRange.GetEndPos()) { - blocks.push_back(WordsRange(prevHypoRange.GetEndPos()+1,lastRange.GetEndPos())); + sourceBlocks.push_back(Block(prevHypoRange,true)); + if (prevHypoRange.GetEndPos() < lastBlock.first.GetEndPos()) { + sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false)); } } - cerr << "Blocks: "; - for (size_t i = 0; i < blocks.size(); ++i) cerr << blocks[i] << " "; + cerr << "Source Blocks: "; + for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " " + << (sourceBlocks[i].second ? "NT" : "T") << " "; cerr << endl; - //this currently doesn't work + //Mapping from source word to target rule position + vector sourceWordToTargetRulePos(sourceSize); + map alignMap; + alignMap.insert( + cur_hypo.GetCurrTargetPhrase().GetAlignTerm().begin(), + cur_hypo.GetCurrTargetPhrase().GetAlignTerm().end()); + alignMap.insert( + cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().begin(), + cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm().end()); + //vector alignMapTerm = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm() + size_t sourceRulePos = 0; + //cerr << "SW->RP "; + for (vector::const_iterator sourceBlockIt = sourceBlocks.begin(); + sourceBlockIt != sourceBlocks.end(); ++sourceBlockIt) { + for (size_t sourceWordPos = sourceBlockIt->first.GetStartPos(); + sourceWordPos <= sourceBlockIt->first.GetEndPos(); ++sourceWordPos) { + sourceWordToTargetRulePos[sourceWordPos - sourceStart] = alignMap[sourceRulePos]; + // cerr << sourceWordPos - sourceStart << "-" << alignMap[sourceRulePos] << " "; + if (! sourceBlockIt->second) { + //T + ++sourceRulePos; + } + } + if ( sourceBlockIt->second) { + //NT + ++sourceRulePos; + } + } + //cerr << endl; + + /** const InputPath* inputPath = cur_hypo.GetTranslationOption().GetInputPath(); - //The phrase is always dangling - //cerr << "IP: phrase " << inputPath << endl; - /* + cerr << "IP phrase: " << inputPath->GetPhrase() << endl; cerr << "NTs "; for (NonTerminalSet::const_iterator i = inputPath->GetNonTerminalSet().begin(); i != inputPath->GetNonTerminalSet().end(); ++i) { cerr << *i << " "; } cerr << endl; + **/ + //Iterate through block pairs + const Sentence& sentence = + dynamic_cast(cur_hypo.GetManager().GetSource()); + //const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); + for (size_t i = 0; i < sourceBlocks.size()-1; ++i) { + Block& leftSourceBlock = sourceBlocks[i]; + Block& rightSourceBlock = sourceBlocks[i+1]; + size_t sourceLeftBoundaryPos = leftSourceBlock.first.GetEndPos(); + size_t sourceRightBoundaryPos = rightSourceBlock.first.GetStartPos(); + const Word& sourceLeftBoundaryWord = sentence.GetWord(sourceLeftBoundaryPos); + const Word& sourceRightBoundaryWord = sentence.GetWord(sourceRightBoundaryPos); + sourceLeftBoundaryPos -= sourceStart; + sourceRightBoundaryPos -= sourceStart; + + // Need to figure out where these map to on the target. + size_t targetLeftRulePos = + sourceWordToTargetRulePos[sourceLeftBoundaryPos]; + size_t targetRightRulePos = + sourceWordToTargetRulePos[sourceRightBoundaryPos]; + + bool isMonotone = true; + if ((sourceLeftBoundaryPos < sourceRightBoundaryPos && + targetLeftRulePos > targetRightRulePos) || + ((sourceLeftBoundaryPos > sourceRightBoundaryPos && + targetLeftRulePos < targetRightRulePos))) + { + isMonotone = false; + } + cerr << sourceLeftBoundaryWord.GetFactor(0)->GetString() << + "_" << sourceRightBoundaryWord.GetFactor(0)->GetString() << "_" + << (isMonotone ? "M" : "S") << endl; + } + cerr << endl; + + /* + cerr << "NT align "; + const AlignmentInfo& align = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm(); + for (AlignmentInfo::CollType::const_iterator i = align.begin(); i != align.end(); ++i) { + cerr << i->first << "," << i->second << " "; + } + cerr << endl; + + cerr << "T align "; + const AlignmentInfo& alignT = cur_hypo.GetCurrTargetPhrase().GetAlignTerm(); + for (AlignmentInfo::CollType::const_iterator i = alignT.begin(); i != alignT.end(); ++i) { + cerr << i->first << "," << i->second << " "; + } + cerr << endl; */ + /* //Get mapping from target to source, in target order vector > targetNTs; //(srcIdx,targetPos) for (size_t targetIdx = 0; targetIdx < nonTermIndexMap.size(); ++targetIdx) { @@ -171,7 +261,7 @@ FFState* SparseReorderingFeature::EvaluateChart( isMonotone, accumulator); } - } + }*/ return new SparseReorderingState(); } From 82369968b5ea9b18086b04e9935da72fd484a4d3 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 13 Sep 2013 16:52:42 +0100 Subject: [PATCH 08/84] vocabulary, type configuration --- moses/FF/SparseReorderingFeature.cpp | 123 +++++++++------------------ moses/FF/SparseReorderingFeature.h | 20 ++++- 2 files changed, 55 insertions(+), 88 deletions(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 1fcd5e3b0c..63e7dddc90 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -2,6 +2,7 @@ #include "moses/ChartHypothesis.h" #include "moses/ChartManager.h" +#include "moses/FactorCollection.h" #include "moses/Sentence.h" #include "util/exception.hh" @@ -15,6 +16,7 @@ namespace Moses SparseReorderingFeature::SparseReorderingFeature(const std::string &line) :StatefulFeatureFunction("StatefulFeatureFunction",0, line), + m_type(SourceCombined), m_sourceFactor(0), m_targetFactor(0), m_sourceVocabFile(""), @@ -32,6 +34,7 @@ SparseReorderingFeature::SparseReorderingFeature(const std::string &line) */ cerr << "Constructing a Sparse Reordering feature" << endl; ReadParameters(); + m_otherFactor = FactorCollection::Instance().AddFactor("##OTHER##"); LoadVocabulary(m_sourceVocabFile, m_sourceVocab); LoadVocabulary(m_targetVocabFile, m_targetVocab); } @@ -45,44 +48,37 @@ void SparseReorderingFeature::SetParameter(const std::string& key, const std::st m_sourceVocabFile = value; } else if (key == "output-vocab-file") { m_targetVocabFile = value; + } else if (key == "type") { + if (value == "SourceCombined") { + m_type = SourceCombined; + } else if (value == "SourceLeft") { + m_type = SourceLeft; + } else if (value == "SourceRight") { + m_type = SourceRight; + } else { + UTIL_THROW(util::Exception, "Unknown sparse reordering type " << value); + } } else { FeatureFunction::SetParameter(key, value); } } -void SparseReorderingFeature::LoadVocabulary(const std::string& filename, boost::unordered_set& vocab) +void SparseReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab) { if (filename.empty()) return; ifstream in(filename.c_str()); UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename); + string line; + while(getline(in,line)) { + vocab.insert(FactorCollection::Instance().AddFactor(line)); + } + in.close(); } -static void AddFeatureWordPair(const string& prefix, const string& suffix, - const Word& word1, const Word& word2, ScoreComponentCollection* accumulator, FactorType factor = 0) { - stringstream buf; - buf << prefix << word1[factor]->GetString() << "_" << word2[factor]->GetString() << suffix; - accumulator->SparsePlusEquals(buf.str(), 1); -} - - -void SparseReorderingFeature::AddNonTerminalPairFeatures( - const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2, - bool isMonotone, ScoreComponentCollection* accumulator) const { - //TODO: remove string concatenation - const static string monotone = "_M"; - const static string swap = "_S"; - const static string prefixes[] = - { "srf_slslw_", "srf_slsrw_", "srf_srslw_", "srf_srsrw_"}; - - string direction = isMonotone ? monotone : swap; - AddFeatureWordPair(prefixes[0], direction, - sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetStartPos()), accumulator); - AddFeatureWordPair(prefixes[1], direction, - sentence.GetWord(nt1.GetStartPos()), sentence.GetWord(nt2.GetEndPos()), accumulator); - AddFeatureWordPair(prefixes[2], direction, - sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator); - AddFeatureWordPair(prefixes[3], direction, - sentence.GetWord(nt1.GetEndPos()), sentence.GetWord(nt2.GetStartPos()), accumulator); +const Factor* SparseReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const { + const Factor* factor = word.GetFactor(factorType); + if (vocab.size() && vocab.find(factor) == vocab.end()) return m_otherFactor; + return factor; } FFState* SparseReorderingFeature::EvaluateChart( @@ -141,10 +137,12 @@ FFState* SparseReorderingFeature::EvaluateChart( sourceBlocks.push_back(Block(WordsRange(prevHypoRange.GetEndPos()+1,lastBlock.first.GetEndPos()), false)); } } + /* cerr << "Source Blocks: "; for (size_t i = 0; i < sourceBlocks.size(); ++i) cerr << sourceBlocks[i].first << " " << (sourceBlocks[i].second ? "NT" : "T") << " "; cerr << endl; + */ //Mapping from source word to target rule position vector sourceWordToTargetRulePos(sourceSize); @@ -176,16 +174,6 @@ FFState* SparseReorderingFeature::EvaluateChart( } //cerr << endl; - /** - const InputPath* inputPath = cur_hypo.GetTranslationOption().GetInputPath(); - cerr << "IP phrase: " << inputPath->GetPhrase() << endl; - cerr << "NTs "; - for (NonTerminalSet::const_iterator i = inputPath->GetNonTerminalSet().begin(); - i != inputPath->GetNonTerminalSet().end(); ++i) { - cerr << *i << " "; - } - cerr << endl; - **/ //Iterate through block pairs const Sentence& sentence = dynamic_cast(cur_hypo.GetManager().GetSource()); @@ -214,55 +202,20 @@ FFState* SparseReorderingFeature::EvaluateChart( { isMonotone = false; } - cerr << sourceLeftBoundaryWord.GetFactor(0)->GetString() << - "_" << sourceRightBoundaryWord.GetFactor(0)->GetString() << "_" - << (isMonotone ? "M" : "S") << endl; - } - cerr << endl; - - /* - cerr << "NT align "; - const AlignmentInfo& align = cur_hypo.GetCurrTargetPhrase().GetAlignNonTerm(); - for (AlignmentInfo::CollType::const_iterator i = align.begin(); i != align.end(); ++i) { - cerr << i->first << "," << i->second << " "; - } - cerr << endl; - - cerr << "T align "; - const AlignmentInfo& alignT = cur_hypo.GetCurrTargetPhrase().GetAlignTerm(); - for (AlignmentInfo::CollType::const_iterator i = alignT.begin(); i != alignT.end(); ++i) { - cerr << i->first << "," << i->second << " "; - } - cerr << endl; - */ - - /* - //Get mapping from target to source, in target order - vector > targetNTs; //(srcIdx,targetPos) - for (size_t targetIdx = 0; targetIdx < nonTermIndexMap.size(); ++targetIdx) { - size_t srcNTIdx; - if ((srcNTIdx = nonTermIndexMap[targetIdx]) == NOT_FOUND) continue; - targetNTs.push_back(pair (srcNTIdx,targetIdx)); - } - //Add features for pairs of non-terminals - for (size_t i = 0; i < targetNTs.size(); ++i) { - for (size_t j = i+1; j < targetNTs.size(); ++j) { - size_t src1 = targetNTs[i].first; - size_t src2 = targetNTs[j].first; - //NT pair (src1,src2) maps to (i,j) - bool isMonotone = true; - if ((src1 < src2 && i > j) || (src1 > src2 && i < j)) isMonotone = false; - //NB: should throw bad_cast for Lattice input - const Sentence& sentence = - dynamic_cast(cur_hypo.GetManager().GetSource()); - AddNonTerminalPairFeatures(sentence, - cur_hypo.GetPrevHypo(src1)->GetCurrSourceRange(), - cur_hypo.GetPrevHypo(src2)->GetCurrSourceRange(), - isMonotone, - accumulator); + stringstream buf; + buf << "sr_h_"; //sparse reordering, Huck + if (m_type == SourceLeft || m_type == SourceCombined) { + buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); + buf << "_"; } - }*/ - + if (m_type == SourceRight || m_type == SourceCombined) { + buf << GetFactor(sourceRightBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); + buf << "_"; + } + buf << (isMonotone ? "M" : "S"); + accumulator->SparsePlusEquals(buf.str(), 1); + } +// cerr << endl; return new SparseReorderingState(); } diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h index 408b00b342..021d276456 100644 --- a/moses/FF/SparseReorderingFeature.h +++ b/moses/FF/SparseReorderingFeature.h @@ -4,6 +4,8 @@ #include +#include + #include "StatefulFeatureFunction.h" #include "FFState.h" @@ -22,6 +24,12 @@ class SparseReorderingState : public FFState class SparseReorderingFeature : public StatefulFeatureFunction { public: + enum Type { + SourceCombined, + SourceLeft, + SourceRight + }; + SparseReorderingFeature(const std::string &line); bool IsUseable(const FactorMask &mask) const @@ -58,19 +66,25 @@ class SparseReorderingFeature : public StatefulFeatureFunction private: + typedef boost::unordered_set Vocab; + void AddNonTerminalPairFeatures( const Sentence& sentence, const WordsRange& nt1, const WordsRange& nt2, bool isMonotone, ScoreComponentCollection* accumulator) const; - void LoadVocabulary(const std::string& filename, boost::unordered_set& vocab); + void LoadVocabulary(const std::string& filename, Vocab& vocab); + const Factor* GetFactor(const Word& word, const Vocab& vocab, FactorType factor) const; + Type m_type; FactorType m_sourceFactor; FactorType m_targetFactor; std::string m_sourceVocabFile; std::string m_targetVocabFile; - boost::unordered_set m_sourceVocab; - boost::unordered_set m_targetVocab; + const Factor* m_otherFactor; + + Vocab m_sourceVocab; + Vocab m_targetVocab; }; From 0496363db34ebd80313a226006b6083f501e67df Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 13 Sep 2013 17:38:14 +0100 Subject: [PATCH 09/84] Convert to stateless --- moses/FF/SparseReorderingFeature.cpp | 6 ++--- moses/FF/SparseReorderingFeature.h | 37 +++++++--------------------- 2 files changed, 11 insertions(+), 32 deletions(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 63e7dddc90..8703a2765b 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -15,7 +15,7 @@ namespace Moses { SparseReorderingFeature::SparseReorderingFeature(const std::string &line) - :StatefulFeatureFunction("StatefulFeatureFunction",0, line), + :StatelessFeatureFunction("StatefulFeatureFunction",0, line), m_type(SourceCombined), m_sourceFactor(0), m_targetFactor(0), @@ -81,9 +81,8 @@ const Factor* SparseReorderingFeature::GetFactor(const Word& word, const Vocab& return factor; } -FFState* SparseReorderingFeature::EvaluateChart( +void SparseReorderingFeature::EvaluateChart( const ChartHypothesis& cur_hypo , - int featureID /*- used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const { // get index map for underlying hypotheses @@ -216,7 +215,6 @@ FFState* SparseReorderingFeature::EvaluateChart( accumulator->SparsePlusEquals(buf.str(), 1); } // cerr << endl; - return new SparseReorderingState(); } diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseReorderingFeature.h index 021d276456..200200806d 100644 --- a/moses/FF/SparseReorderingFeature.h +++ b/moses/FF/SparseReorderingFeature.h @@ -6,22 +6,13 @@ #include -#include "StatefulFeatureFunction.h" +#include "StatelessFeatureFunction.h" #include "FFState.h" namespace Moses { -class SparseReorderingState : public FFState -{ -public: - int Compare(const FFState& other) const - { - return 0; - } -}; - -class SparseReorderingFeature : public StatefulFeatureFunction +class SparseReorderingFeature : public StatelessFeatureFunction { public: enum Type { @@ -46,23 +37,13 @@ class SparseReorderingFeature : public StatefulFeatureFunction , const InputPath &inputPath , ScoreComponentCollection &scoreBreakdown) const {} - FFState* Evaluate( - const Hypothesis& cur_hypo, - const FFState* prev_state, - ScoreComponentCollection* accumulator) const - { - return new SparseReorderingState(); - } - - FFState* EvaluateChart( - const ChartHypothesis& /* cur_hypo */, - int /* featureID - used to index the state in the previous hypotheses */, - ScoreComponentCollection* accumulator) const; - - virtual const FFState* EmptyHypothesisState(const InputType &input) const - { - return new SparseReorderingState(); - } + + virtual void Evaluate(const Hypothesis& hypo, + ScoreComponentCollection* accumulator) const + {} + void EvaluateChart(const ChartHypothesis &hypo, + ScoreComponentCollection* accumulator) const; + private: From f816a138efd2a00d451609ba1afca8c33be417c9 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 13 Sep 2013 18:21:52 +0100 Subject: [PATCH 10/84] feature name --- moses/FF/SparseReorderingFeature.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseReorderingFeature.cpp index 8703a2765b..0203406b07 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseReorderingFeature.cpp @@ -15,7 +15,7 @@ namespace Moses { SparseReorderingFeature::SparseReorderingFeature(const std::string &line) - :StatelessFeatureFunction("StatefulFeatureFunction",0, line), + :StatelessFeatureFunction("SparseReorderingFeature",0, line), m_type(SourceCombined), m_sourceFactor(0), m_targetFactor(0), @@ -202,7 +202,7 @@ void SparseReorderingFeature::EvaluateChart( isMonotone = false; } stringstream buf; - buf << "sr_h_"; //sparse reordering, Huck + buf << "h_"; //sparse reordering, Huck if (m_type == SourceLeft || m_type == SourceCombined) { buf << GetFactor(sourceLeftBoundaryWord,m_sourceVocab,m_sourceFactor)->GetString(); buf << "_"; @@ -212,7 +212,7 @@ void SparseReorderingFeature::EvaluateChart( buf << "_"; } buf << (isMonotone ? "M" : "S"); - accumulator->SparsePlusEquals(buf.str(), 1); + accumulator->PlusEquals(this,buf.str(), 1); } // cerr << endl; } From d737a352b373363c1bacc72bc5f20221fea90712 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Sat, 14 Sep 2013 17:22:43 +0100 Subject: [PATCH 11/84] Renamed --- moses/FF/Factory.cpp | 4 ++-- ...eature.cpp => SparseHieroReorderingFeature.cpp} | 14 +++++++------- ...ingFeature.h => SparseHieroReorderingFeature.h} | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) rename moses/FF/{SparseReorderingFeature.cpp => SparseHieroReorderingFeature.cpp} (93%) rename moses/FF/{SparseReorderingFeature.h => SparseHieroReorderingFeature.h} (93%) diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 24efde4c3e..c9a6a3f105 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -25,7 +25,7 @@ #include "moses/FF/PhrasePairFeature.h" #include "moses/FF/PhraseLengthFeature.h" #include "moses/FF/DistortionScoreProducer.h" -#include "moses/FF/SparseReorderingFeature.h" +#include "moses/FF/SparseHieroReorderingFeature.h" #include "moses/FF/WordPenaltyProducer.h" #include "moses/FF/InputFeature.h" #include "moses/FF/PhrasePenalty.h" @@ -145,7 +145,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(ControlRecombination); MOSES_FNAME(SkeletonStatelessFF); MOSES_FNAME(SkeletonStatefulFF); - MOSES_FNAME(SparseReorderingFeature); + MOSES_FNAME(SparseHieroReorderingFeature); MOSES_FNAME(ExternalFeature); #ifdef HAVE_SYNLM diff --git a/moses/FF/SparseReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp similarity index 93% rename from moses/FF/SparseReorderingFeature.cpp rename to moses/FF/SparseHieroReorderingFeature.cpp index 0203406b07..bdb18c787e 100644 --- a/moses/FF/SparseReorderingFeature.cpp +++ b/moses/FF/SparseHieroReorderingFeature.cpp @@ -7,15 +7,15 @@ #include "util/exception.hh" -#include "SparseReorderingFeature.h" +#include "SparseHieroReorderingFeature.h" using namespace std; namespace Moses { -SparseReorderingFeature::SparseReorderingFeature(const std::string &line) - :StatelessFeatureFunction("SparseReorderingFeature",0, line), +SparseHieroReorderingFeature::SparseHieroReorderingFeature(const std::string &line) + :StatelessFeatureFunction("SparseHieroReorderingFeature",0, line), m_type(SourceCombined), m_sourceFactor(0), m_targetFactor(0), @@ -39,7 +39,7 @@ SparseReorderingFeature::SparseReorderingFeature(const std::string &line) LoadVocabulary(m_targetVocabFile, m_targetVocab); } -void SparseReorderingFeature::SetParameter(const std::string& key, const std::string& value) { +void SparseHieroReorderingFeature::SetParameter(const std::string& key, const std::string& value) { if (key == "input-factor") { m_sourceFactor = Scan(value); } else if (key == "output-factor") { @@ -63,7 +63,7 @@ void SparseReorderingFeature::SetParameter(const std::string& key, const std::st } } -void SparseReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab) +void SparseHieroReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab) { if (filename.empty()) return; ifstream in(filename.c_str()); @@ -75,13 +75,13 @@ void SparseReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& in.close(); } -const Factor* SparseReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const { +const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vocab& vocab, FactorType factorType) const { const Factor* factor = word.GetFactor(factorType); if (vocab.size() && vocab.find(factor) == vocab.end()) return m_otherFactor; return factor; } -void SparseReorderingFeature::EvaluateChart( +void SparseHieroReorderingFeature::EvaluateChart( const ChartHypothesis& cur_hypo , ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/SparseReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h similarity index 93% rename from moses/FF/SparseReorderingFeature.h rename to moses/FF/SparseHieroReorderingFeature.h index 200200806d..7059b73158 100644 --- a/moses/FF/SparseReorderingFeature.h +++ b/moses/FF/SparseHieroReorderingFeature.h @@ -12,7 +12,7 @@ namespace Moses { -class SparseReorderingFeature : public StatelessFeatureFunction +class SparseHieroReorderingFeature : public StatelessFeatureFunction { public: enum Type { @@ -21,7 +21,7 @@ class SparseReorderingFeature : public StatelessFeatureFunction SourceRight }; - SparseReorderingFeature(const std::string &line); + SparseHieroReorderingFeature(const std::string &line); bool IsUseable(const FactorMask &mask) const { return true; } From 266e36c4401f3235bc2c1bd292e0fc324ae3f7e4 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Wed, 18 Sep 2013 21:58:38 +0100 Subject: [PATCH 12/84] stub out unit test --- moses/FF/SparseHieroReorderingFeature.h | 3 ++ moses/FF/SparseHieroReorderingFeatureTest.cpp | 36 +++++++++++++++++++ moses/Jamfile | 4 +-- 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 moses/FF/SparseHieroReorderingFeatureTest.cpp diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h index 7059b73158..ec220af036 100644 --- a/moses/FF/SparseHieroReorderingFeature.h +++ b/moses/FF/SparseHieroReorderingFeature.h @@ -6,6 +6,9 @@ #include +#include "moses/Factor.h" +#include "moses/Sentence.h" + #include "StatelessFeatureFunction.h" #include "FFState.h" diff --git a/moses/FF/SparseHieroReorderingFeatureTest.cpp b/moses/FF/SparseHieroReorderingFeatureTest.cpp new file mode 100644 index 0000000000..f05355df91 --- /dev/null +++ b/moses/FF/SparseHieroReorderingFeatureTest.cpp @@ -0,0 +1,36 @@ +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2013- University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ +#include + +#include + +#include "SparseHieroReorderingFeature.h" + +using namespace Moses; +using namespace std; + +BOOST_AUTO_TEST_SUITE(shrf) + +BOOST_AUTO_TEST_CASE(lexical_rule) +{ + SparseHieroReorderingFeature feature("name=shrf"); + +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/moses/Jamfile b/moses/Jamfile index 26a98c4c98..b344415858 100644 --- a/moses/Jamfile +++ b/moses/Jamfile @@ -64,7 +64,7 @@ lib moses : : #exceptions ThreadPool.cpp SyntacticLanguageModel.cpp - *Test.cpp Mock*.cpp + *Test.cpp Mock*.cpp FF/*Test.cpp FF/Factory.cpp ] headers FF_Factory.o LM//LM TranslationModel/CompactPT//CompactPT synlm ThreadPool rt @@ -74,5 +74,5 @@ alias headers-to-install : [ glob-tree *.h ] ; import testing ; -unit-test moses_test : [ glob *Test.cpp Mock*.cpp ] moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ; +unit-test moses_test : [ glob *Test.cpp Mock*.cpp FF/*Test.cpp ] moses headers ..//z ../OnDiskPt//OnDiskPt ..//boost_unit_test_framework ; From 1c00f8d9a7b3302e06d64b33d981d908900a8e2e Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Mon, 26 May 2014 13:47:32 +0100 Subject: [PATCH 13/84] Should be with other LR classes --- moses/{ => FF/LexicalReordering}/ReorderingStack.cpp | 0 moses/{ => FF/LexicalReordering}/ReorderingStack.h | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename moses/{ => FF/LexicalReordering}/ReorderingStack.cpp (100%) rename moses/{ => FF/LexicalReordering}/ReorderingStack.h (94%) diff --git a/moses/ReorderingStack.cpp b/moses/FF/LexicalReordering/ReorderingStack.cpp similarity index 100% rename from moses/ReorderingStack.cpp rename to moses/FF/LexicalReordering/ReorderingStack.cpp diff --git a/moses/ReorderingStack.h b/moses/FF/LexicalReordering/ReorderingStack.h similarity index 94% rename from moses/ReorderingStack.h rename to moses/FF/LexicalReordering/ReorderingStack.h index 730b17ce31..5a5b80d160 100644 --- a/moses/ReorderingStack.h +++ b/moses/FF/LexicalReordering/ReorderingStack.h @@ -12,7 +12,7 @@ //#include "Phrase.h" //#include "TypeDef.h" //#include "Util.h" -#include "WordsRange.h" +#include "moses/WordsRange.h" namespace Moses { From 1bd851411f24dd0903a1714259d7ce0cebd345df Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 27 May 2014 09:52:18 +0100 Subject: [PATCH 14/84] fix includes --- moses/FF/LexicalReordering/LexicalReorderingState.cpp | 2 +- moses/FF/LexicalReordering/LexicalReorderingState.h | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index aa29a4a12c..c13c3ee64d 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -5,11 +5,11 @@ #include "moses/FF/FFState.h" #include "moses/Hypothesis.h" #include "moses/WordsRange.h" -#include "moses/ReorderingStack.h" #include "moses/TranslationOption.h" #include "LexicalReordering.h" #include "LexicalReorderingState.h" +#include "ReorderingStack.h" namespace Moses { diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index 8e237adc1a..a581ae2161 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -8,10 +8,10 @@ #include "LexicalReordering.h" #include "moses/WordsRange.h" #include "moses/WordsBitmap.h" -#include "moses/ReorderingStack.h" #include "moses/TranslationOption.h" #include "moses/FF/FFState.h" +#include "ReorderingStack.h" namespace Moses { @@ -19,7 +19,6 @@ class LexicalReorderingState; class LexicalReordering; /** Factory class for lexical reordering states - * @todo There's a lot of classes for lexicalized reordering. Perhaps put them in a separate dir */ class LexicalReorderingConfiguration { @@ -99,7 +98,7 @@ class LexicalReorderingState : public FFState // The following is the true direction of the object, which can be Backward or Forward even if the Configuration has Bidirectional. LexicalReorderingConfiguration::Direction m_direction; size_t m_offset; - const Scores *m_prevScore; + const TranslationOption *m_prevOption; inline LexicalReorderingState(const LexicalReorderingState *prev, const TranslationOption &topt) : m_configuration(prev->m_configuration), m_direction(prev->m_direction), m_offset(prev->m_offset), From 244d9cd824390c1209b6c91eb59e9af339f83325 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 27 May 2014 11:05:56 +0100 Subject: [PATCH 15/84] Stub out sparse reordering class --- .../LexicalReordering/LexicalReordering.cpp | 7 ++++ .../FF/LexicalReordering/LexicalReordering.h | 8 +++-- .../LexicalReorderingState.cpp | 24 +++++++------ .../LexicalReorderingState.h | 12 ++++--- .../FF/LexicalReordering/SparseReordering.cpp | 25 ++++++++++++++ moses/FF/LexicalReordering/SparseReordering.h | 34 +++++++++++++++++++ 6 files changed, 92 insertions(+), 18 deletions(-) create mode 100644 moses/FF/LexicalReordering/SparseReordering.cpp create mode 100644 moses/FF/LexicalReordering/SparseReordering.h diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index 6a2a488d91..10b68913ca 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -14,6 +14,7 @@ LexicalReordering::LexicalReordering(const std::string &line) { std::cerr << "Initializing LexicalReordering.." << std::endl; + map sparseArgs; for (size_t i = 0; i < m_args.size(); ++i) { const vector &args = m_args[i]; @@ -27,6 +28,8 @@ LexicalReordering::LexicalReordering(const std::string &line) m_factorsE =Tokenize(args[1]); } else if (args[0] == "path") { m_filePath = args[1]; + } else if (args[0].substr(0,7) == "sparse-") { + sparseArgs[args[0].substr(7)] = args[1]; } else { throw "Unknown argument " + args[0]; } @@ -48,6 +51,10 @@ LexicalReordering::LexicalReordering(const std::string &line) default: throw "Unknown conditioning option!"; } + + if (sparseArgs.size()) { + m_sparse.reset(new SparseReordering(sparseArgs)); + } } LexicalReordering::~LexicalReordering() diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index 4ff0057f09..39d11a582d 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -3,17 +3,20 @@ #include #include +#include #include "moses/Factor.h" #include "moses/Phrase.h" #include "moses/TypeDef.h" #include "moses/Util.h" #include "moses/WordsRange.h" -#include "LexicalReorderingState.h" -#include "LexicalReorderingTable.h" #include "moses/FF/StatefulFeatureFunction.h" #include "util/exception.hh" +#include "LexicalReorderingState.h" +#include "LexicalReorderingTable.h" +#include "SparseReordering.h" + namespace Moses { @@ -79,6 +82,7 @@ class LexicalReordering : public StatefulFeatureFunction //bool m_oneScorePerDirection; std::vector m_factorsE, m_factorsF; std::string m_filePath; + boost::scoped_ptr m_sparse; }; } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index c13c3ee64d..cef5c8cbb0 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -128,7 +128,8 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward, "Unknown direction: " << m_direction); const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ? - topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) : m_prevScore; + topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) : + m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer()); // No scores available. TODO: Using a good prior distribution would be nicer. if(cachedScores == NULL) @@ -151,23 +152,24 @@ void LexicalReorderingState::ClearScores(Scores& scores) const std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0); } -int LexicalReorderingState::ComparePrevScores(const Scores *other) const +int LexicalReorderingState::ComparePrevScores(const TranslationOption *other) const { - if(m_prevScore == other) + const Scores* myPrevScores = m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer()); + const Scores* otherPrevScores = other->GetLexReorderingScores(m_configuration.GetScoreProducer()); + + if(myPrevScores == otherPrevScores) return 0; // The pointers are NULL if a phrase pair isn't found in the reordering table. - if(other == NULL) + if(otherPrevScores == NULL) return -1; - if(m_prevScore == NULL) + if(myPrevScores == NULL) return 1; - const Scores &my = *m_prevScore; - const Scores &their = *other; for(size_t i = m_offset; i < m_offset + m_configuration.GetNumberOfTypes(); i++) - if(my[i] < their[i]) + if((*myPrevScores)[i] < (*otherPrevScores)[i]) return -1; - else if(my[i] > their[i]) + else if((*myPrevScores)[i] > (*otherPrevScores)[i]) return 1; return 0; @@ -193,7 +195,7 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const UTIL_THROW_IF2(other == NULL, "Wrong state type"); if (m_prevRange == other->m_prevRange) { if (m_direction == LexicalReorderingConfiguration::Forward) { - return ComparePrevScores(other->m_prevScore); + return ComparePrevScores(other->m_prevOption); } else { return 0; } @@ -411,7 +413,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const UTIL_THROW_IF2(other == NULL, "Wrong state type"); if (m_prevRange == other->m_prevRange) { - return ComparePrevScores(other->m_prevScore); + return ComparePrevScores(other->m_prevOption); } else if (m_prevRange < other->m_prevRange) { return -1; } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index a581ae2161..14e3b5189f 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -4,8 +4,9 @@ #include #include + #include "moses/Hypothesis.h" -#include "LexicalReordering.h" +//#include "LexicalReordering.h" #include "moses/WordsRange.h" #include "moses/WordsBitmap.h" #include "moses/TranslationOption.h" @@ -89,28 +90,29 @@ class LexicalReorderingState : public FFState static LexicalReorderingState* CreateLexicalReorderingState(const std::vector& config, LexicalReorderingConfiguration::Direction dir, const InputType &input); + typedef int ReorderingType; protected: - typedef int ReorderingType; const LexicalReorderingConfiguration &m_configuration; // The following is the true direction of the object, which can be Backward or Forward even if the Configuration has Bidirectional. LexicalReorderingConfiguration::Direction m_direction; size_t m_offset; + //forward scores are conditioned on prev option, so need to remember it const TranslationOption *m_prevOption; inline LexicalReorderingState(const LexicalReorderingState *prev, const TranslationOption &topt) : m_configuration(prev->m_configuration), m_direction(prev->m_direction), m_offset(prev->m_offset), - m_prevScore(topt.GetLexReorderingScores(m_configuration.GetScoreProducer())) {} + m_prevOption(&topt) {} inline LexicalReorderingState(const LexicalReorderingConfiguration &config, LexicalReorderingConfiguration::Direction dir, size_t offset) - : m_configuration(config), m_direction(dir), m_offset(offset), m_prevScore(NULL) {} + : m_configuration(config), m_direction(dir), m_offset(offset), m_prevOption(NULL) {} // copy the right scores in the right places, taking into account forward/backward, offset, collapse void CopyScores(Scores& scores, const TranslationOption& topt, ReorderingType reoType) const; void ClearScores(Scores& scores) const; - int ComparePrevScores(const Scores *other) const; + int ComparePrevScores(const TranslationOption *other) const; //constants for the different type of reorderings (corresponding to indexes in the table file) static const ReorderingType M = 0; // monotonic diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp new file mode 100644 index 0000000000..6caf1bef23 --- /dev/null +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -0,0 +1,25 @@ +#include "SparseReordering.h" + +using namespace std; + +namespace Moses +{ + +SparseReordering::SparseReordering(const map& config) +{ + for (map::const_iterator i = config.begin(); i != config.end(); ++i) { + cerr << i->first << " " << i->second << endl; + } +} + + +void SparseReordering::AddScores( + const TranslationOption& topt, + LexicalReorderingState::ReorderingType reoType, + LexicalReorderingConfiguration::Direction direction, + ScoreComponentCollection* scores) const +{ +} + +} //namespace + diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h new file mode 100644 index 0000000000..0f30554a22 --- /dev/null +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -0,0 +1,34 @@ +#ifndef moses_FF_LexicalReordering_SparseReordering_h +#define moses_FF_LexicalReordering_SparseReordering_h + +/** + * Sparse reordering features for phrase-based MT, following Cherry (NAACL, 2013) +**/ + + +#include +#include + +#include "moses/ScoreComponentCollection.h" +#include "LexicalReorderingState.h" + +namespace Moses +{ +class SparseReordering +{ +public: + SparseReordering(const std::map& config); + + void AddScores(const TranslationOption& topt, + LexicalReorderingState::ReorderingType reoType, + LexicalReorderingConfiguration::Direction direction, + ScoreComponentCollection* scores) const ; + +}; + + + +} //namespace + + +#endif From 912c9c1f554a6671f73d02744421966478a1cbd2 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 27 May 2014 13:36:58 +0100 Subject: [PATCH 16/84] Configuration --- moses/FF/LexicalReordering/SparseReordering.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h index 0f30554a22..226f765449 100644 --- a/moses/FF/LexicalReordering/SparseReordering.h +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -8,10 +8,25 @@ #include #include +#include #include "moses/ScoreComponentCollection.h" #include "LexicalReorderingState.h" +/** + Configuration of sparse reordering: + + The sparse reordering feature is configured using sparse-* configs in the lexical reordering line. + sparse-words-= -- Features which fire for the words in the list + sparse-clusters-= -- Features which fire for clusters in the list. Format + of cluster file TBD + sparse-phrase -- Add features which depend on the current phrase + sparse-stack -- Add features which depend on the previous phrase, or + top of stack. + sparse-between -- Add features which depend on words between previous phrase + (or top of stack) and current phrase. +**/ + namespace Moses { class SparseReordering @@ -24,6 +39,9 @@ class SparseReordering LexicalReorderingConfiguration::Direction direction, ScoreComponentCollection* scores) const ; +private: + + }; From 10e26ef00d308d7abf1cd1ad7a76bd33eb3a0c43 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 30 May 2014 11:27:59 +0100 Subject: [PATCH 17/84] config of sparse reordering --- .../FF/LexicalReordering/SparseReordering.cpp | 39 ++++++++++++++++++- moses/FF/LexicalReordering/SparseReordering.h | 13 +++++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 6caf1bef23..0405ff29ed 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -1,5 +1,11 @@ +#include + +#include "moses/Util.h" +#include "util/exception.hh" + #include "SparseReordering.h" + using namespace std; namespace Moses @@ -7,11 +13,42 @@ namespace Moses SparseReordering::SparseReordering(const map& config) { + static const string kSource= "source"; + static const string kTarget = "target"; for (map::const_iterator i = config.begin(); i != config.end(); ++i) { - cerr << i->first << " " << i->second << endl; + vector fields = Tokenize(i->first, "-"); + if (fields[0] == "words") { + UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering word list name should be sparse-words-(source|target)-"); + if (fields[1] == kSource) { + ReadWordList(i->second,fields[2],&m_sourceWordLists); + } else if (fields[1] == kTarget) { + ReadWordList(i->second,fields[2],&m_targetWordLists); + } else { + UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]); + } + } else if (fields[0] == "clusters") { + UTIL_THROW(util::Exception, "Sparse reordering does not yet support clusters" << i->first); + } else if (fields[0] == "phrase") { + m_usePhrase = true; + } else if (fields[0] == "stack") { + m_useStack = true; + } else if (fields[0] == "between") { + m_useBetween = true; + } else { + UTIL_THROW(util::Exception, "Unable to parse sparse reordering option: " << i->first); + } } } +void SparseReordering::ReadWordList(const string& filename, const string& id, vector* pWordLists) { + ifstream fh(filename.c_str()); + string line; + pWordLists->push_back(WordList()); + pWordLists->back().first = id; + while (getline(fh,line)) { + pWordLists->back().second.insert(line); + } +} void SparseReordering::AddScores( const TranslationOption& topt, diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h index 226f765449..a6db663e0f 100644 --- a/moses/FF/LexicalReordering/SparseReordering.h +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -17,8 +17,8 @@ Configuration of sparse reordering: The sparse reordering feature is configured using sparse-* configs in the lexical reordering line. - sparse-words-= -- Features which fire for the words in the list - sparse-clusters-= -- Features which fire for clusters in the list. Format + sparse-words-(source|target)-= -- Features which fire for the words in the list + sparse-clusters-(source|target)-= -- Features which fire for clusters in the list. Format of cluster file TBD sparse-phrase -- Add features which depend on the current phrase sparse-stack -- Add features which depend on the previous phrase, or @@ -40,7 +40,14 @@ class SparseReordering ScoreComponentCollection* scores) const ; private: - + typedef std::pair > WordList; //id and list + std::vector m_sourceWordLists; + std::vector m_targetWordLists; + bool m_usePhrase; + bool m_useBetween; + bool m_useStack; + + void ReadWordList(const std::string& filename, const std::string& id, std::vector* pWordLists); }; From 18d1bceea099a17009b93605d41ebfea448727a0 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 6 Jun 2014 14:20:23 +0100 Subject: [PATCH 18/84] Move sparse reordering object to LR config --- moses/FF/LexicalReordering/LexicalReordering.cpp | 10 +++------- moses/FF/LexicalReordering/LexicalReordering.h | 5 ++--- moses/FF/LexicalReordering/LexicalReorderingState.cpp | 7 +++++++ moses/FF/LexicalReordering/LexicalReorderingState.h | 5 +++++ 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index 10b68913ca..c5daee95bb 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -19,7 +19,7 @@ LexicalReordering::LexicalReordering(const std::string &line) const vector &args = m_args[i]; if (args[0] == "type") { - m_configuration = new LexicalReorderingConfiguration(args[1]); + m_configuration.reset(new LexicalReorderingConfiguration(args[1])); m_configuration->SetScoreProducer(this); m_modelTypeString = m_configuration->GetModelString(); } else if (args[0] == "input-factor") { @@ -52,20 +52,16 @@ LexicalReordering::LexicalReordering(const std::string &line) throw "Unknown conditioning option!"; } - if (sparseArgs.size()) { - m_sparse.reset(new SparseReordering(sparseArgs)); - } + m_configuration->ConfigureSparse(sparseArgs); } LexicalReordering::~LexicalReordering() { - delete m_table; - delete m_configuration; } void LexicalReordering::Load() { - m_table = LexicalReorderingTable::LoadAvailable(m_filePath, m_factorsF, m_factorsE, std::vector()); + m_table.reset(LexicalReorderingTable::LoadAvailable(m_filePath, m_factorsF, m_factorsE, std::vector())); } Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index 39d11a582d..6255987a4f 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -72,17 +72,16 @@ class LexicalReordering : public StatefulFeatureFunction bool DecodeDirection(std::string s); bool DecodeNumFeatureFunctions(std::string s); - LexicalReorderingConfiguration *m_configuration; + boost::scoped_ptr m_configuration; std::string m_modelTypeString; std::vector m_modelType; - LexicalReorderingTable* m_table; + boost::scoped_ptr m_table; //std::vector m_direction; std::vector m_condition; //std::vector m_scoreOffset; //bool m_oneScorePerDirection; std::vector m_factorsE, m_factorsF; std::string m_filePath; - boost::scoped_ptr m_sparse; }; } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index cef5c8cbb0..d334749431 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -38,6 +38,13 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const } } +void LexicalReorderingConfiguration::ConfigureSparse(const std::map& sparseArgs) +{ + if (sparseArgs.size()) { + m_sparse.reset(new SparseReordering(sparseArgs)); + } +} + void LexicalReorderingConfiguration::SetAdditionalScoreComponents(size_t number) { m_additionalScoreComponents = number; diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index 14e3b5189f..5c179c39cf 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -4,6 +4,7 @@ #include #include +#include #include "moses/Hypothesis.h" //#include "LexicalReordering.h" @@ -18,6 +19,7 @@ namespace Moses { class LexicalReorderingState; class LexicalReordering; +class SparseReordering; /** Factory class for lexical reordering states */ @@ -31,6 +33,8 @@ class LexicalReorderingConfiguration LexicalReorderingConfiguration(const std::string &modelType); + void ConfigureSparse(const std::map& sparseArgs); + LexicalReorderingState *CreateLexicalReorderingState(const InputType &input) const; size_t GetNumScoreComponents() const; @@ -79,6 +83,7 @@ class LexicalReorderingConfiguration Direction m_direction; Condition m_condition; size_t m_additionalScoreComponents; + boost::scoped_ptr m_sparse; }; //! Abstract class for lexical reordering model states From 4aa4fe0a046921f3850534847e239bd8879b54ad Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 6 Jun 2014 20:25:45 +0100 Subject: [PATCH 19/84] Pass scc, not scores --- .../LexicalReordering/LexicalReordering.cpp | 5 +- .../LexicalReorderingState.cpp | 46 ++++++++----------- .../LexicalReorderingState.h | 14 +++--- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index c5daee95bb..6c73863600 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -73,11 +73,8 @@ FFState* LexicalReordering::Evaluate(const Hypothesis& hypo, const FFState* prev_state, ScoreComponentCollection* out) const { - Scores score(GetNumScoreComponents(), 0); const LexicalReorderingState *prev = dynamic_cast(prev_state); - LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), score); - - out->PlusEquals(this, score); + LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), out); return next_state; } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index d334749431..c6782974ba 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -129,7 +129,7 @@ LexicalReorderingState *LexicalReorderingConfiguration::CreateLexicalReorderingS return new BidirectionalReorderingState(*this, bwd, fwd, 0); } -void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption &topt, ReorderingType reoType) const +void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const TranslationOption &topt, ReorderingType reoType) const { // don't call this on a bidirectional object UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward, @@ -142,6 +142,8 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption if(cachedScores == NULL) return; + Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0); + const Scores &scoreSet = *cachedScores; if(m_configuration.CollapseScores()) scores[m_offset] = scoreSet[m_offset + reoType]; @@ -149,15 +151,9 @@ void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0); scores[m_offset + reoType] = scoreSet[m_offset + reoType]; } + accum->PlusEquals(m_configuration.GetScoreProducer(), scores); } -void LexicalReorderingState::ClearScores(Scores& scores) const -{ - if(m_configuration.CollapseScores()) - scores[m_offset] = 0; - else - std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0); -} int LexicalReorderingState::ComparePrevScores(const TranslationOption *other) const { @@ -212,27 +208,23 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const return 1; } -LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, Scores& scores) const +LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const { ReorderingType reoType; const WordsRange currWordsRange = topt.GetSourceWordsRange(); const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType(); - if (m_direction == LexicalReorderingConfiguration::Forward && m_first) { - ClearScores(scores); - } else { - if (!m_first || m_useFirstBackwardScore) { - if (modelType == LexicalReorderingConfiguration::MSD) { - reoType = GetOrientationTypeMSD(currWordsRange); - } else if (modelType == LexicalReorderingConfiguration::MSLR) { - reoType = GetOrientationTypeMSLR(currWordsRange); - } else if (modelType == LexicalReorderingConfiguration::Monotonic) { - reoType = GetOrientationTypeMonotonic(currWordsRange); - } else { - reoType = GetOrientationTypeLeftRight(currWordsRange); - } - CopyScores(scores, topt, reoType); + if ((m_direction != LexicalReorderingConfiguration::Forward && m_useFirstBackwardScore) || !m_first) { + if (modelType == LexicalReorderingConfiguration::MSD) { + reoType = GetOrientationTypeMSD(currWordsRange); + } else if (modelType == LexicalReorderingConfiguration::MSLR) { + reoType = GetOrientationTypeMSLR(currWordsRange); + } else if (modelType == LexicalReorderingConfiguration::Monotonic) { + reoType = GetOrientationTypeMonotonic(currWordsRange); + } else { + reoType = GetOrientationTypeLeftRight(currWordsRange); } + CopyScores(scores, topt, reoType); } return new PhraseBasedReorderingState(this, topt); @@ -310,7 +302,7 @@ int BidirectionalReorderingState::Compare(const FFState& o) const return m_forward->Compare(*other.m_forward); } -LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, Scores& scores) const +LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const { LexicalReorderingState *newbwd = m_backward->Expand(topt, scores); LexicalReorderingState *newfwd = m_forward->Expand(topt, scores); @@ -334,7 +326,7 @@ int HierarchicalReorderingBackwardState::Compare(const FFState& o) const return m_reoStack.Compare(other.m_reoStack); } -LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, Scores& scores) const +LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const { HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack); @@ -438,7 +430,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const // dright: if the next phrase follows the conditioning phrase and other stuff comes in between // dleft: if the next phrase precedes the conditioning phrase and other stuff comes in between -LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, Scores& scores) const +LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const { const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType(); const WordsRange currWordsRange = topt.GetSourceWordsRange(); @@ -449,7 +441,7 @@ LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const Transla ReorderingType reoType; if (m_first) { - ClearScores(scores); + } else { if (modelType == LexicalReorderingConfiguration::MSD) { reoType = GetOrientationTypeMSD(currWordsRange, coverage); diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index 5c179c39cf..e8d9269b87 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -8,6 +8,7 @@ #include "moses/Hypothesis.h" //#include "LexicalReordering.h" +#include "moses/ScoreComponentCollection.h" #include "moses/WordsRange.h" #include "moses/WordsBitmap.h" #include "moses/TranslationOption.h" @@ -91,7 +92,7 @@ class LexicalReorderingState : public FFState { public: virtual int Compare(const FFState& o) const = 0; - virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const = 0; + virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const = 0; static LexicalReorderingState* CreateLexicalReorderingState(const std::vector& config, LexicalReorderingConfiguration::Direction dir, const InputType &input); @@ -115,8 +116,7 @@ class LexicalReorderingState : public FFState : m_configuration(config), m_direction(dir), m_offset(offset), m_prevOption(NULL) {} // copy the right scores in the right places, taking into account forward/backward, offset, collapse - void CopyScores(Scores& scores, const TranslationOption& topt, ReorderingType reoType) const; - void ClearScores(Scores& scores) const; + void CopyScores(ScoreComponentCollection* scores, const TranslationOption& topt, ReorderingType reoType) const; int ComparePrevScores(const TranslationOption *other) const; //constants for the different type of reorderings (corresponding to indexes in the table file) @@ -146,7 +146,7 @@ class BidirectionalReorderingState : public LexicalReorderingState } virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& topt, Scores& scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const; }; //! State for the standard Moses implementation of lexical reordering models @@ -162,7 +162,7 @@ class PhraseBasedReorderingState : public LexicalReorderingState PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt); virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& topt, Scores& scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const; ReorderingType GetOrientationTypeMSD(WordsRange currRange) const; ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const; @@ -183,7 +183,7 @@ class HierarchicalReorderingBackwardState : public LexicalReorderingState const TranslationOption &topt, ReorderingStack reoStack); virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const; private: ReorderingType GetOrientationTypeMSD(int reoDistance) const; @@ -206,7 +206,7 @@ class HierarchicalReorderingForwardState : public LexicalReorderingState HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt); virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& hypo, Scores& scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const; private: ReorderingType GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const; From 3dec0abf0acdf7699e9643650ccae22f5a4f5785 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Fri, 6 Jun 2014 21:08:09 +0100 Subject: [PATCH 20/84] Call sparse reordering --- .../LexicalReorderingState.cpp | 36 ++++++++++--------- .../LexicalReorderingState.h | 4 +++ .../FF/LexicalReordering/SparseReordering.cpp | 5 +-- moses/FF/LexicalReordering/SparseReordering.h | 7 ++-- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index c6782974ba..63f47c8855 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -134,24 +134,26 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const // don't call this on a bidirectional object UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward, "Unknown direction: " << m_direction); - const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ? - topt.GetLexReorderingScores(m_configuration.GetScoreProducer()) : - m_prevOption->GetLexReorderingScores(m_configuration.GetScoreProducer()); - - // No scores available. TODO: Using a good prior distribution would be nicer. - if(cachedScores == NULL) - return; - - Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0); - - const Scores &scoreSet = *cachedScores; - if(m_configuration.CollapseScores()) - scores[m_offset] = scoreSet[m_offset + reoType]; - else { - std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0); - scores[m_offset + reoType] = scoreSet[m_offset + reoType]; + const TranslationOption* relevantOpt = &topt; + if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption; + const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer()); + + if(cachedScores) { + Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0); + + const Scores &scoreSet = *cachedScores; + if(m_configuration.CollapseScores()) + scores[m_offset] = scoreSet[m_offset + reoType]; + else { + std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0); + scores[m_offset + reoType] = scoreSet[m_offset + reoType]; + } + accum->PlusEquals(m_configuration.GetScoreProducer(), scores); } - accum->PlusEquals(m_configuration.GetScoreProducer(), scores); + + const SparseReordering* sparse = m_configuration.GetSparseReordering(); + if (sparse) sparse->CopyScores(*relevantOpt, reoType, m_direction, accum); + } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index e8d9269b87..058ae01c40 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -67,6 +67,10 @@ class LexicalReorderingConfiguration return m_collapseScores; } + const SparseReordering* GetSparseReordering() const { + return m_sparse.get(); + } + private: void SetScoreProducer(LexicalReordering* scoreProducer) { m_scoreProducer = scoreProducer; diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 0405ff29ed..14a3b2667c 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -50,12 +50,13 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, ve } } -void SparseReordering::AddScores( - const TranslationOption& topt, +void SparseReordering::CopyScores( + const TranslationOption& topt, LexicalReorderingState::ReorderingType reoType, LexicalReorderingConfiguration::Direction direction, ScoreComponentCollection* scores) const { + //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; } } //namespace diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h index a6db663e0f..0a5803e1bf 100644 --- a/moses/FF/LexicalReordering/SparseReordering.h +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -20,9 +20,9 @@ sparse-words-(source|target)-= -- Features which fire for the words in the list sparse-clusters-(source|target)-= -- Features which fire for clusters in the list. Format of cluster file TBD - sparse-phrase -- Add features which depend on the current phrase + sparse-phrase -- Add features which depend on the current phrase (backward) sparse-stack -- Add features which depend on the previous phrase, or - top of stack. + top of stack. (forward) sparse-between -- Add features which depend on words between previous phrase (or top of stack) and current phrase. **/ @@ -34,7 +34,8 @@ class SparseReordering public: SparseReordering(const std::map& config); - void AddScores(const TranslationOption& topt, + //If direction is backward topt is the current option, otherwise the previous + void CopyScores(const TranslationOption& topt, LexicalReorderingState::ReorderingType reoType, LexicalReorderingConfiguration::Direction direction, ScoreComponentCollection* scores) const ; From a5e5a6590b0d3a3a92b183e3cf7b3401ecf18589 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Mon, 9 Jun 2014 22:17:05 +0100 Subject: [PATCH 21/84] basic implementation of non-hierarchical sparse features --- .../LexicalReordering/LexicalReordering.cpp | 2 +- .../LexicalReorderingState.cpp | 5 +- .../LexicalReorderingState.h | 2 +- .../FF/LexicalReordering/SparseReordering.cpp | 49 ++++++++++++++++++- moses/FF/LexicalReordering/SparseReordering.h | 11 ++++- 5 files changed, 61 insertions(+), 8 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index 6c73863600..d3e52c23c3 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -52,7 +52,7 @@ LexicalReordering::LexicalReordering(const std::string &line) throw "Unknown conditioning option!"; } - m_configuration->ConfigureSparse(sparseArgs); + m_configuration->ConfigureSparse(sparseArgs, this); } LexicalReordering::~LexicalReordering() diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index 63f47c8855..349f06af76 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -38,10 +38,11 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const } } -void LexicalReorderingConfiguration::ConfigureSparse(const std::map& sparseArgs) +void LexicalReorderingConfiguration::ConfigureSparse + (const std::map& sparseArgs, const LexicalReordering* producer) { if (sparseArgs.size()) { - m_sparse.reset(new SparseReordering(sparseArgs)); + m_sparse.reset(new SparseReordering(sparseArgs, producer)); } } diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index 058ae01c40..e37ea71783 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -34,7 +34,7 @@ class LexicalReorderingConfiguration LexicalReorderingConfiguration(const std::string &modelType); - void ConfigureSparse(const std::map& sparseArgs); + void ConfigureSparse(const std::map& sparseArgs, const LexicalReordering* producer); LexicalReorderingState *CreateLexicalReorderingState(const InputType &input) const; diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 14a3b2667c..443cf49d96 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -1,8 +1,11 @@ #include +#include "moses/FactorCollection.h" +#include "moses/InputPath.h" #include "moses/Util.h" #include "util/exception.hh" +#include "LexicalReordering.h" #include "SparseReordering.h" @@ -11,7 +14,8 @@ using namespace std; namespace Moses { -SparseReordering::SparseReordering(const map& config) +SparseReordering::SparseReordering(const map& config, const LexicalReordering* producer) + : m_producer(producer) { static const string kSource= "source"; static const string kTarget = "target"; @@ -42,14 +46,32 @@ SparseReordering::SparseReordering(const map& config) void SparseReordering::ReadWordList(const string& filename, const string& id, vector* pWordLists) { ifstream fh(filename.c_str()); + UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename); string line; pWordLists->push_back(WordList()); pWordLists->back().first = id; while (getline(fh,line)) { - pWordLists->back().second.insert(line); + //TODO: StringPiece + const Factor* factor = FactorCollection::Instance().AddFactor(line); + pWordLists->back().second.insert(factor); } } +void SparseReordering::AddFeatures( + const string& type, const Word& word, const string& position, const WordList& words, + LexicalReorderingState::ReorderingType reoType, + ScoreComponentCollection* scores) const { + + //TODO: Precalculate all feature names + static string kSep = "-"; + const Factor* wordFactor = word.GetFactor(0); + if (words.second.find(wordFactor) == words.second.end()) return; + ostringstream buf; + buf << type << kSep << position << kSep << words.first << kSep << wordFactor->GetString() << kSep << reoType; + scores->PlusEquals(m_producer, buf.str(), 1.0); + +} + void SparseReordering::CopyScores( const TranslationOption& topt, LexicalReorderingState::ReorderingType reoType, @@ -57,6 +79,29 @@ void SparseReordering::CopyScores( ScoreComponentCollection* scores) const { //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; + const string kPhrase = "phr"; //phrase (backward) + const string kStack = "stk"; //stack (forward) + + const string* type = &kPhrase; + //TODO: bidirectional? + if (direction == LexicalReorderingConfiguration::Forward) { + if (!m_useStack) return; + type = &kStack; + } else if (direction == LexicalReorderingConfiguration::Backward && !m_usePhrase) { + return; + } + for (vector::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) { + const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase(); + AddFeatures(*type, sourcePhrase.GetWord(0), "src.first", *i, reoType, scores); + AddFeatures(*type, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), "src.last", *i, reoType, scores); + } + for (vector::const_iterator i = m_targetWordLists.begin(); i != m_targetWordLists.end(); ++i) { + const Phrase& targetPhrase = topt.GetTargetPhrase(); + AddFeatures(*type, targetPhrase.GetWord(0), "tgt.first", *i, reoType, scores); + AddFeatures(*type, targetPhrase.GetWord(targetPhrase.GetSize()-1), "tgt.last", *i, reoType, scores); + } + + } } //namespace diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h index 0a5803e1bf..ec6c3c04e3 100644 --- a/moses/FF/LexicalReordering/SparseReordering.h +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -10,6 +10,8 @@ #include #include +#include + #include "moses/ScoreComponentCollection.h" #include "LexicalReorderingState.h" @@ -32,7 +34,7 @@ namespace Moses class SparseReordering { public: - SparseReordering(const std::map& config); + SparseReordering(const std::map& config, const LexicalReordering* producer); //If direction is backward topt is the current option, otherwise the previous void CopyScores(const TranslationOption& topt, @@ -41,7 +43,8 @@ class SparseReordering ScoreComponentCollection* scores) const ; private: - typedef std::pair > WordList; //id and list + const LexicalReordering* m_producer; + typedef std::pair > WordList; //id and list std::vector m_sourceWordLists; std::vector m_targetWordLists; bool m_usePhrase; @@ -49,6 +52,10 @@ class SparseReordering bool m_useStack; void ReadWordList(const std::string& filename, const std::string& id, std::vector* pWordLists); + void AddFeatures( + const std::string& type, const Word& word, const std::string& position, const WordList& words, + LexicalReorderingState::ReorderingType reoType, + ScoreComponentCollection* scores) const; }; From 91ccf8ef72356f1f61c42b3e0dd9a56aac32c451 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 10 Jun 2014 10:23:48 +0100 Subject: [PATCH 22/84] bidirectional case --- moses/FF/LexicalReordering/SparseReordering.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 443cf49d96..677698400f 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -82,13 +82,16 @@ void SparseReordering::CopyScores( const string kPhrase = "phr"; //phrase (backward) const string kStack = "stk"; //stack (forward) - const string* type = &kPhrase; - //TODO: bidirectional? + const string* type = NULL;// &kPhrase; if (direction == LexicalReorderingConfiguration::Forward) { if (!m_useStack) return; type = &kStack; - } else if (direction == LexicalReorderingConfiguration::Backward && !m_usePhrase) { - return; + } else if (direction == LexicalReorderingConfiguration::Backward) { + if (!m_usePhrase) return; + type = &kPhrase; + } else { + //Shouldn't be called for bidirectional + assert(!"Shouldn't call CopyScores() with bidirectional direction"); } for (vector::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) { const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase(); From 2785989e219d217bcf02927506ef12ff43100aaa Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Thu, 12 Jun 2014 21:37:18 +0100 Subject: [PATCH 23/84] precalculation of feature names --- .../LexicalReorderingState.h | 2 + .../FF/LexicalReordering/SparseReordering.cpp | 98 +++++++++++++++---- moses/FF/LexicalReordering/SparseReordering.h | 62 +++++++++++- 3 files changed, 137 insertions(+), 25 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index e37ea71783..cf91eaf69b 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -132,6 +132,8 @@ class LexicalReorderingState : public FFState static const ReorderingType DR = 3; // discontinuous, right static const ReorderingType R = 0; // right static const ReorderingType L = 1; // left + public: + static const ReorderingType MAX = 3; //largest possible }; //! @todo what is this? diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 677698400f..f1a334ece8 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -14,6 +14,40 @@ using namespace std; namespace Moses { +const std::string& SparseReorderingFeatureKey::Name(const string& wordListId) { + static string kSep = "-"; + static string name; + ostringstream buf; + // type side position id word reotype + if (type == Phrase) { + buf << "phr"; + } else if (type == Stack) { + buf << "stk"; + } else if (type == Between) { + buf << "btn"; + } + buf << kSep; + if (side == Source) { + buf << "src"; + } else if (side == Target) { + buf << "tgt"; + } + buf << kSep; + if (position == First) { + buf << "first"; + } else if (position == Last) { + buf << "last"; + } + buf << kSep; + buf << wordListId; + buf << kSep; + buf << word->GetString(); + buf << kSep; + buf << reoType; + name = buf.str(); + return name; +} + SparseReordering::SparseReordering(const map& config, const LexicalReordering* producer) : m_producer(producer) { @@ -24,9 +58,9 @@ SparseReordering::SparseReordering(const map& config, const Lexic if (fields[0] == "words") { UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering word list name should be sparse-words-(source|target)-"); if (fields[1] == kSource) { - ReadWordList(i->second,fields[2],&m_sourceWordLists); + ReadWordList(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceWordLists); } else if (fields[1] == kTarget) { - ReadWordList(i->second,fields[2],&m_targetWordLists); + ReadWordList(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetWordLists); } else { UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]); } @@ -42,9 +76,10 @@ SparseReordering::SparseReordering(const map& config, const Lexic UTIL_THROW(util::Exception, "Unable to parse sparse reordering option: " << i->first); } } + } -void SparseReordering::ReadWordList(const string& filename, const string& id, vector* pWordLists) { +void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector* pWordLists) { ifstream fh(filename.c_str()); UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename); string line; @@ -54,21 +89,37 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, ve //TODO: StringPiece const Factor* factor = FactorCollection::Instance().AddFactor(line); pWordLists->back().second.insert(factor); + //Pre-calculate feature names. + for (size_t type = SparseReorderingFeatureKey::Stack; + type <= SparseReorderingFeatureKey::Between; ++type) { + for (size_t position = SparseReorderingFeatureKey::First; + position <= SparseReorderingFeatureKey::Last; ++position) { + for (int reoType = 0; reoType < LexicalReorderingState::MAX; ++reoType) { + SparseReorderingFeatureKey key( + pWordLists->size()-1, static_cast(type), + factor, static_cast(position), side, reoType); + m_featureMap[key] = key.Name(id); + } + } + } + } } -void SparseReordering::AddFeatures( - const string& type, const Word& word, const string& position, const WordList& words, - LexicalReorderingState::ReorderingType reoType, +void SparseReordering::AddFeatures(size_t id, + SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side, + const Word& word, SparseReorderingFeatureKey::Position position, + const WordList& words, LexicalReorderingState::ReorderingType reoType, ScoreComponentCollection* scores) const { //TODO: Precalculate all feature names static string kSep = "-"; const Factor* wordFactor = word.GetFactor(0); if (words.second.find(wordFactor) == words.second.end()) return; - ostringstream buf; - buf << type << kSep << position << kSep << words.first << kSep << wordFactor->GetString() << kSep << reoType; - scores->PlusEquals(m_producer, buf.str(), 1.0); + SparseReorderingFeatureKey key(id, type, wordFactor, position, side, reoType); + FeatureMap::const_iterator fmi = m_featureMap.find(key); + assert(fmi != m_featureMap.end()); + scores->PlusEquals(m_producer, fmi->second, 1.0); } @@ -79,29 +130,34 @@ void SparseReordering::CopyScores( ScoreComponentCollection* scores) const { //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; - const string kPhrase = "phr"; //phrase (backward) - const string kStack = "stk"; //stack (forward) - - const string* type = NULL;// &kPhrase; + //phrase (backward) + //stack (forward) + SparseReorderingFeatureKey::Type type; if (direction == LexicalReorderingConfiguration::Forward) { if (!m_useStack) return; - type = &kStack; + type = SparseReorderingFeatureKey::Stack; } else if (direction == LexicalReorderingConfiguration::Backward) { if (!m_usePhrase) return; - type = &kPhrase; + type = SparseReorderingFeatureKey::Phrase; } else { //Shouldn't be called for bidirectional + //keep compiler happy + type = SparseReorderingFeatureKey::Phrase; assert(!"Shouldn't call CopyScores() with bidirectional direction"); } - for (vector::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) { + for (size_t i = 0; i < m_sourceWordLists.size(); ++i) { const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase(); - AddFeatures(*type, sourcePhrase.GetWord(0), "src.first", *i, reoType, scores); - AddFeatures(*type, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), "src.last", *i, reoType, scores); + AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0), + SparseReorderingFeatureKey::First, m_sourceWordLists[i], reoType, scores); + AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), + SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores); } - for (vector::const_iterator i = m_targetWordLists.begin(); i != m_targetWordLists.end(); ++i) { + for (size_t i = 0; i < m_sourceWordLists.size(); ++i) { const Phrase& targetPhrase = topt.GetTargetPhrase(); - AddFeatures(*type, targetPhrase.GetWord(0), "tgt.first", *i, reoType, scores); - AddFeatures(*type, targetPhrase.GetWord(targetPhrase.GetSize()-1), "tgt.last", *i, reoType, scores); + AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), + SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores); + AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), + SparseReorderingFeatureKey::Last, m_targetWordLists[i], reoType, scores); } diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h index ec6c3c04e3..76c72a2019 100644 --- a/moses/FF/LexicalReordering/SparseReordering.h +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -6,12 +6,17 @@ **/ +#include #include #include #include #include +#include "util/murmur_hash.hh" +#include "util/pool.hh" +#include "util/string_piece.hh" + #include "moses/ScoreComponentCollection.h" #include "LexicalReorderingState.h" @@ -31,6 +36,51 @@ namespace Moses { + +/** + * Used to store pre-calculated feature names. +**/ +struct SparseReorderingFeatureKey { + size_t id; + enum Type {Stack, Phrase, Between} type; + const Factor* word; + enum Position {First, Last} position; + enum Side {Source, Target} side; + LexicalReorderingState::ReorderingType reoType; + + SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, Position position_, + Side side_, LexicalReorderingState::ReorderingType reoType_) + : id(id_), type(type_), word(word_), position(position_), side(side_), reoType(reoType_) + {} + + const std::string& Name(const std::string& wordListId) ; +}; + +struct HashSparseReorderingFeatureKey : public std::unary_function { + std::size_t operator()(const SparseReorderingFeatureKey& key) const { + //TODO: can we just hash the memory? + //not sure, there could be random padding + std::size_t seed = 0; + seed = util::MurmurHashNative(&key.id, sizeof(key.id), seed); + seed = util::MurmurHashNative(&key.type, sizeof(key.type), seed); + seed = util::MurmurHashNative(&key.word, sizeof(key.word), seed); + seed = util::MurmurHashNative(&key.position, sizeof(key.position), seed); + seed = util::MurmurHashNative(&key.side, sizeof(key.side), seed); + seed = util::MurmurHashNative(&key.reoType, sizeof(key.reoType), seed); + return seed; + } +}; + +struct EqualsSparseReorderingFeatureKey : + public std::binary_function { + bool operator()(const SparseReorderingFeatureKey& left, const SparseReorderingFeatureKey& right) const { + //TODO: Can we just compare the memory? + return left.id == right.id && left.type == right.type && left.word == right.word && + left.position == right.position && left.side == right.side && + left.reoType == right.reoType; + } +}; + class SparseReordering { public: @@ -50,11 +100,15 @@ class SparseReordering bool m_usePhrase; bool m_useBetween; bool m_useStack; + typedef boost::unordered_map FeatureMap; + FeatureMap m_featureMap; - void ReadWordList(const std::string& filename, const std::string& id, std::vector* pWordLists); - void AddFeatures( - const std::string& type, const Word& word, const std::string& position, const WordList& words, - LexicalReorderingState::ReorderingType reoType, + void ReadWordList(const std::string& filename, const std::string& id, + SparseReorderingFeatureKey::Side side, std::vector* pWordLists); + void AddFeatures(size_t id, + SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side, + const Word& word, SparseReorderingFeatureKey::Position position, + const WordList& words, LexicalReorderingState::ReorderingType reoType, ScoreComponentCollection* scores) const; }; From 1afa0bc8f6c463e163f4d35d2bd5586f6397d883 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Mon, 23 Jun 2014 17:06:00 +0100 Subject: [PATCH 24/84] Fix for hreo --- moses/FF/LexicalReordering/SparseReordering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index f1a334ece8..8a61f52151 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -94,7 +94,7 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp type <= SparseReorderingFeatureKey::Between; ++type) { for (size_t position = SparseReorderingFeatureKey::First; position <= SparseReorderingFeatureKey::Last; ++position) { - for (int reoType = 0; reoType < LexicalReorderingState::MAX; ++reoType) { + for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) { SparseReorderingFeatureKey key( pWordLists->size()-1, static_cast(type), factor, static_cast(position), side, reoType); @@ -112,7 +112,6 @@ void SparseReordering::AddFeatures(size_t id, const WordList& words, LexicalReorderingState::ReorderingType reoType, ScoreComponentCollection* scores) const { - //TODO: Precalculate all feature names static string kSep = "-"; const Factor* wordFactor = word.GetFactor(0); if (words.second.find(wordFactor) == words.second.end()) return; From cc426190e2d11c93eb5cf6da5219a9ae39cc97c5 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Mon, 23 Jun 2014 17:46:45 +0100 Subject: [PATCH 25/84] Minor fix --- moses/FF/LexicalReordering/SparseReordering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 8a61f52151..426aad1830 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -112,7 +112,6 @@ void SparseReordering::AddFeatures(size_t id, const WordList& words, LexicalReorderingState::ReorderingType reoType, ScoreComponentCollection* scores) const { - static string kSep = "-"; const Factor* wordFactor = word.GetFactor(0); if (words.second.find(wordFactor) == words.second.end()) return; SparseReorderingFeatureKey key(id, type, wordFactor, position, side, reoType); @@ -151,7 +150,7 @@ void SparseReordering::CopyScores( AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores); } - for (size_t i = 0; i < m_sourceWordLists.size(); ++i) { + for (size_t i = 0; i < m_targetWordLists.size(); ++i) { const Phrase& targetPhrase = topt.GetTargetPhrase(); AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores); From 69222ee32bc1d8086c7a196150019ee5bbc1b447 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 24 Jun 2014 13:37:54 +0100 Subject: [PATCH 26/84] Implement 'between' featuresr. --- .../LexicalReordering/LexicalReordering.cpp | 2 +- .../LexicalReorderingState.cpp | 22 +++++------ .../LexicalReorderingState.h | 14 +++---- .../FF/LexicalReordering/SparseReordering.cpp | 37 +++++++++++++++++-- moses/FF/LexicalReordering/SparseReordering.h | 6 ++- 5 files changed, 57 insertions(+), 24 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index 4278af1a51..b0b18c65fa 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -75,7 +75,7 @@ FFState* LexicalReordering::Evaluate(const Hypothesis& hypo, { Scores score(GetNumScoreComponents(), 0); const LexicalReorderingState *prev = dynamic_cast(prev_state); - LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), out); + LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), hypo.GetInput(), out); out->PlusEquals(this, score); diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp index dfdc0ddb8f..fa88fdeab0 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp +++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp @@ -130,7 +130,7 @@ LexicalReorderingState *LexicalReorderingConfiguration::CreateLexicalReorderingS return new BidirectionalReorderingState(*this, bwd, fwd, 0); } -void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const TranslationOption &topt, ReorderingType reoType) const +void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const TranslationOption &topt, const InputType& input, ReorderingType reoType) const { // don't call this on a bidirectional object UTIL_THROW_IF2(m_direction != LexicalReorderingConfiguration::Backward && m_direction != LexicalReorderingConfiguration::Forward, @@ -153,7 +153,7 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const } const SparseReordering* sparse = m_configuration.GetSparseReordering(); - if (sparse) sparse->CopyScores(*relevantOpt, reoType, m_direction, accum); + if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, m_direction, accum); } @@ -210,7 +210,7 @@ int PhraseBasedReorderingState::Compare(const FFState& o) const return 1; } -LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const +LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const { ReorderingType reoType; const WordsRange currWordsRange = topt.GetSourceWordsRange(); @@ -226,7 +226,7 @@ LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOpti } else { reoType = GetOrientationTypeLeftRight(currWordsRange); } - CopyScores(scores, topt, reoType); + CopyScores(scores, topt, input, reoType); } return new PhraseBasedReorderingState(this, topt); @@ -304,10 +304,10 @@ int BidirectionalReorderingState::Compare(const FFState& o) const return m_forward->Compare(*other.m_forward); } -LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const +LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const { - LexicalReorderingState *newbwd = m_backward->Expand(topt, scores); - LexicalReorderingState *newfwd = m_forward->Expand(topt, scores); + LexicalReorderingState *newbwd = m_backward->Expand(topt,input, scores); + LexicalReorderingState *newfwd = m_forward->Expand(topt, input, scores); return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset); } @@ -328,7 +328,7 @@ int HierarchicalReorderingBackwardState::Compare(const FFState& o) const return m_reoStack.Compare(other.m_reoStack); } -LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const +LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const { HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack); @@ -347,7 +347,7 @@ LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const Transl reoType = GetOrientationTypeMonotonic(reoDistance); } - CopyScores(scores, topt, reoType); + CopyScores(scores, topt, input, reoType); return nextState; } @@ -431,7 +431,7 @@ int HierarchicalReorderingForwardState::Compare(const FFState& o) const // dright: if the next phrase follows the conditioning phrase and other stuff comes in between // dleft: if the next phrase precedes the conditioning phrase and other stuff comes in between -LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const +LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, const InputType& input,ScoreComponentCollection* scores) const { const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType(); const WordsRange currWordsRange = topt.GetSourceWordsRange(); @@ -454,7 +454,7 @@ LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const Transla reoType = GetOrientationTypeLeftRight(currWordsRange, coverage); } - CopyScores(scores, topt, reoType); + CopyScores(scores, topt, input, reoType); } return new HierarchicalReorderingForwardState(this, topt); diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h index cf91eaf69b..e309ed7f13 100644 --- a/moses/FF/LexicalReordering/LexicalReorderingState.h +++ b/moses/FF/LexicalReordering/LexicalReorderingState.h @@ -96,7 +96,7 @@ class LexicalReorderingState : public FFState { public: virtual int Compare(const FFState& o) const = 0; - virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const = 0; + virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const = 0; static LexicalReorderingState* CreateLexicalReorderingState(const std::vector& config, LexicalReorderingConfiguration::Direction dir, const InputType &input); @@ -120,10 +120,11 @@ class LexicalReorderingState : public FFState : m_configuration(config), m_direction(dir), m_offset(offset), m_prevOption(NULL) {} // copy the right scores in the right places, taking into account forward/backward, offset, collapse - void CopyScores(ScoreComponentCollection* scores, const TranslationOption& topt, ReorderingType reoType) const; + void CopyScores(ScoreComponentCollection* scores, const TranslationOption& topt, const InputType& input, ReorderingType reoType) const; int ComparePrevScores(const TranslationOption *other) const; //constants for the different type of reorderings (corresponding to indexes in the table file) + public: static const ReorderingType M = 0; // monotonic static const ReorderingType NM = 1; // non-monotonic static const ReorderingType S = 1; // swap @@ -132,7 +133,6 @@ class LexicalReorderingState : public FFState static const ReorderingType DR = 3; // discontinuous, right static const ReorderingType R = 0; // right static const ReorderingType L = 1; // left - public: static const ReorderingType MAX = 3; //largest possible }; @@ -152,7 +152,7 @@ class BidirectionalReorderingState : public LexicalReorderingState } virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& topt, const InputType& input, ScoreComponentCollection* scores) const; }; //! State for the standard Moses implementation of lexical reordering models @@ -168,7 +168,7 @@ class PhraseBasedReorderingState : public LexicalReorderingState PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt); virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& topt, ScoreComponentCollection* scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& topt,const InputType& input, ScoreComponentCollection* scores) const; ReorderingType GetOrientationTypeMSD(WordsRange currRange) const; ReorderingType GetOrientationTypeMSLR(WordsRange currRange) const; @@ -189,7 +189,7 @@ class HierarchicalReorderingBackwardState : public LexicalReorderingState const TranslationOption &topt, ReorderingStack reoStack); virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const; private: ReorderingType GetOrientationTypeMSD(int reoDistance) const; @@ -212,7 +212,7 @@ class HierarchicalReorderingForwardState : public LexicalReorderingState HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt); virtual int Compare(const FFState& o) const; - virtual LexicalReorderingState* Expand(const TranslationOption& hypo, ScoreComponentCollection* scores) const; + virtual LexicalReorderingState* Expand(const TranslationOption& hypo, const InputType& input, ScoreComponentCollection* scores) const; private: ReorderingType GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const; diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 426aad1830..21dbf0eb7f 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -122,11 +122,42 @@ void SparseReordering::AddFeatures(size_t id, } void SparseReordering::CopyScores( - const TranslationOption& topt, + const TranslationOption& currentOpt, + const TranslationOption* previousOpt, + const InputType& input, LexicalReorderingState::ReorderingType reoType, LexicalReorderingConfiguration::Direction direction, ScoreComponentCollection* scores) const { + if (m_useBetween && direction == LexicalReorderingConfiguration::Backward && + (reoType == LexicalReorderingState::D || reoType == LexicalReorderingState::DL || + reoType == LexicalReorderingState::DR)) { + size_t gapStart, gapEnd; + const Sentence& sentence = dynamic_cast(input); + const WordsRange& currentRange = currentOpt.GetSourceWordsRange(); + if (previousOpt) { + const WordsRange& previousRange = previousOpt->GetSourceWordsRange(); + if (previousRange < currentRange) { + gapStart = previousRange.GetEndPos() + 1; + gapEnd = currentRange.GetStartPos(); + } else { + gapStart = currentRange.GetEndPos() + 1; + gapEnd = previousRange.GetStartPos(); + } + } else { + //start of sentence + gapStart = 0; + gapEnd = currentRange.GetStartPos(); + } + assert(gapStart < gapEnd); + for (size_t i = gapStart; i < gapEnd; ++i) { + for (size_t j = 0; j < m_sourceWordLists.size(); ++j) { + AddFeatures(j, SparseReorderingFeatureKey::Between, + SparseReorderingFeatureKey::Source, sentence.GetWord(i), + SparseReorderingFeatureKey::First, m_sourceWordLists[j], reoType, scores); + } + } + } //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; //phrase (backward) //stack (forward) @@ -144,14 +175,14 @@ void SparseReordering::CopyScores( assert(!"Shouldn't call CopyScores() with bidirectional direction"); } for (size_t i = 0; i < m_sourceWordLists.size(); ++i) { - const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase(); + const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase(); AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0), SparseReorderingFeatureKey::First, m_sourceWordLists[i], reoType, scores); AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores); } for (size_t i = 0; i < m_targetWordLists.size(); ++i) { - const Phrase& targetPhrase = topt.GetTargetPhrase(); + const Phrase& targetPhrase = currentOpt.GetTargetPhrase(); AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores); AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h index 76c72a2019..04d9e8eb79 100644 --- a/moses/FF/LexicalReordering/SparseReordering.h +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -86,8 +86,10 @@ class SparseReordering public: SparseReordering(const std::map& config, const LexicalReordering* producer); - //If direction is backward topt is the current option, otherwise the previous - void CopyScores(const TranslationOption& topt, + //If direction is backward the options will be different, for forward they will be the same + void CopyScores(const TranslationOption& currentOpt, + const TranslationOption* previousOpt, + const InputType& input, LexicalReorderingState::ReorderingType reoType, LexicalReorderingConfiguration::Direction direction, ScoreComponentCollection* scores) const ; From 961d72269aa6c4a4389761d9d8ad93af3530cf92 Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Tue, 24 Jun 2014 21:50:20 +0100 Subject: [PATCH 27/84] clusters for sparse reordering feature --- .../FF/LexicalReordering/SparseReordering.cpp | 132 ++++++++++++------ moses/FF/LexicalReordering/SparseReordering.h | 19 ++- 2 files changed, 107 insertions(+), 44 deletions(-) diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 21dbf0eb7f..3d8f56d16a 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -3,8 +3,13 @@ #include "moses/FactorCollection.h" #include "moses/InputPath.h" #include "moses/Util.h" + #include "util/exception.hh" +#include "util/file_piece.hh" +#include "util/string_piece.hh" +#include "util/tokenize_piece.hh" + #include "LexicalReordering.h" #include "SparseReordering.h" @@ -41,6 +46,7 @@ const std::string& SparseReorderingFeatureKey::Name(const string& wordListId) { buf << kSep; buf << wordListId; buf << kSep; + if (isCluster) buf << "cluster_"; buf << word->GetString(); buf << kSep; buf << reoType; @@ -65,7 +71,15 @@ SparseReordering::SparseReordering(const map& config, const Lexic UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]); } } else if (fields[0] == "clusters") { - UTIL_THROW(util::Exception, "Sparse reordering does not yet support clusters" << i->first); + UTIL_THROW_IF(!(fields.size() == 3), util::Exception, "Sparse reordering cluster name should be sparse-clusters-(source|target)-"); + if (fields[1] == kSource) { + ReadClusterMap(i->second,fields[2], SparseReorderingFeatureKey::Source, &m_sourceClusterMaps); + } else if (fields[1] == kTarget) { + ReadClusterMap(i->second,fields[2],SparseReorderingFeatureKey::Target, &m_targetClusterMaps); + } else { + UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]); + } + } else if (fields[0] == "phrase") { m_usePhrase = true; } else if (fields[0] == "stack") { @@ -79,6 +93,21 @@ SparseReordering::SparseReordering(const map& config, const Lexic } +void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster) { + for (size_t type = SparseReorderingFeatureKey::Stack; + type <= SparseReorderingFeatureKey::Between; ++type) { + for (size_t position = SparseReorderingFeatureKey::First; + position <= SparseReorderingFeatureKey::Last; ++position) { + for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) { + SparseReorderingFeatureKey key( + index, static_cast(type), factor, isCluster, + static_cast(position), side, reoType); + m_featureMap[key] = key.Name(id); + } + } + } +} + void SparseReordering::ReadWordList(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector* pWordLists) { ifstream fh(filename.c_str()); UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename); @@ -89,35 +118,68 @@ void SparseReordering::ReadWordList(const string& filename, const string& id, Sp //TODO: StringPiece const Factor* factor = FactorCollection::Instance().AddFactor(line); pWordLists->back().second.insert(factor); - //Pre-calculate feature names. - for (size_t type = SparseReorderingFeatureKey::Stack; - type <= SparseReorderingFeatureKey::Between; ++type) { - for (size_t position = SparseReorderingFeatureKey::First; - position <= SparseReorderingFeatureKey::Last; ++position) { - for (int reoType = 0; reoType <= LexicalReorderingState::MAX; ++reoType) { - SparseReorderingFeatureKey key( - pWordLists->size()-1, static_cast(type), - factor, static_cast(position), side, reoType); - m_featureMap[key] = key.Name(id); - } - } - } + PreCalculateFeatureNames(pWordLists->size()-1, id, side, factor, false); } } -void SparseReordering::AddFeatures(size_t id, +void SparseReordering::ReadClusterMap(const string& filename, const string& id, SparseReorderingFeatureKey::Side side, vector* pClusterMaps) { + pClusterMaps->push_back(ClusterMap()); + pClusterMaps->back().first = id; + util::FilePiece file(filename.c_str()); + StringPiece line; + while (true) { + try { + line = file.ReadLine(); + } catch (const util::EndOfFileException &e) { + break; + } + util::TokenIter lineIter(line,util::SingleCharacter('\t')); + const Factor* wordFactor = FactorCollection::Instance().AddFactor(*lineIter); + ++lineIter; + const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter); + pClusterMaps->back().second[wordFactor] = idFactor; + PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true); + } +} + +void SparseReordering::AddFeatures( SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side, const Word& word, SparseReorderingFeatureKey::Position position, - const WordList& words, LexicalReorderingState::ReorderingType reoType, + LexicalReorderingState::ReorderingType reoType, ScoreComponentCollection* scores) const { const Factor* wordFactor = word.GetFactor(0); - if (words.second.find(wordFactor) == words.second.end()) return; - SparseReorderingFeatureKey key(id, type, wordFactor, position, side, reoType); - FeatureMap::const_iterator fmi = m_featureMap.find(key); - assert(fmi != m_featureMap.end()); - scores->PlusEquals(m_producer, fmi->second, 1.0); + + const vector* wordLists; + const vector* clusterMaps; + if (side == SparseReorderingFeatureKey::Source) { + wordLists = &m_sourceWordLists; + clusterMaps = &m_sourceClusterMaps; + } else { + wordLists = &m_targetWordLists; + clusterMaps = &m_targetClusterMaps; + } + + for (size_t id = 0; id < wordLists->size(); ++id) { + if ((*wordLists)[id].second.find(wordFactor) == (*wordLists)[id].second.end()) continue; + SparseReorderingFeatureKey key(id, type, wordFactor, false, position, side, reoType); + FeatureMap::const_iterator fmi = m_featureMap.find(key); + assert(fmi != m_featureMap.end()); + scores->PlusEquals(m_producer, fmi->second, 1.0); + } + + for (size_t id = 0; id < clusterMaps->size(); ++id) { + const ClusterMap& clusterMap = (*clusterMaps)[id]; + boost::unordered_map::const_iterator clusterIter + = clusterMap.second.find(wordFactor); + if (clusterIter != clusterMap.second.end()) { + SparseReorderingFeatureKey key(id, type, clusterIter->second, true, position, side, reoType); + FeatureMap::const_iterator fmi = m_featureMap.find(key); + assert(fmi != m_featureMap.end()); + scores->PlusEquals(m_producer, fmi->second, 1.0); + } + } } @@ -151,11 +213,9 @@ void SparseReordering::CopyScores( } assert(gapStart < gapEnd); for (size_t i = gapStart; i < gapEnd; ++i) { - for (size_t j = 0; j < m_sourceWordLists.size(); ++j) { - AddFeatures(j, SparseReorderingFeatureKey::Between, + AddFeatures(SparseReorderingFeatureKey::Between, SparseReorderingFeatureKey::Source, sentence.GetWord(i), - SparseReorderingFeatureKey::First, m_sourceWordLists[j], reoType, scores); - } + SparseReorderingFeatureKey::First, reoType, scores); } } //std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl; @@ -174,20 +234,14 @@ void SparseReordering::CopyScores( type = SparseReorderingFeatureKey::Phrase; assert(!"Shouldn't call CopyScores() with bidirectional direction"); } - for (size_t i = 0; i < m_sourceWordLists.size(); ++i) { - const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase(); - AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0), - SparseReorderingFeatureKey::First, m_sourceWordLists[i], reoType, scores); - AddFeatures(i, type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), - SparseReorderingFeatureKey::Last, m_sourceWordLists[i], reoType, scores); - } - for (size_t i = 0; i < m_targetWordLists.size(); ++i) { - const Phrase& targetPhrase = currentOpt.GetTargetPhrase(); - AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), - SparseReorderingFeatureKey::First, m_targetWordLists[i], reoType, scores); - AddFeatures(i, type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), - SparseReorderingFeatureKey::Last, m_targetWordLists[i], reoType, scores); - } + const Phrase& sourcePhrase = currentOpt.GetInputPath().GetPhrase(); + AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(0), + SparseReorderingFeatureKey::First, reoType, scores); + AddFeatures(type, SparseReorderingFeatureKey::Source, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); + const Phrase& targetPhrase = currentOpt.GetTargetPhrase(); + AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(0), + SparseReorderingFeatureKey::First, reoType, scores); + AddFeatures(type, SparseReorderingFeatureKey::Target, targetPhrase.GetWord(targetPhrase.GetSize()-1), SparseReorderingFeatureKey::Last, reoType, scores); } diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h index 04d9e8eb79..e496daf942 100644 --- a/moses/FF/LexicalReordering/SparseReordering.h +++ b/moses/FF/LexicalReordering/SparseReordering.h @@ -44,13 +44,15 @@ struct SparseReorderingFeatureKey { size_t id; enum Type {Stack, Phrase, Between} type; const Factor* word; + bool isCluster; enum Position {First, Last} position; enum Side {Source, Target} side; LexicalReorderingState::ReorderingType reoType; - SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, Position position_, - Side side_, LexicalReorderingState::ReorderingType reoType_) - : id(id_), type(type_), word(word_), position(position_), side(side_), reoType(reoType_) + SparseReorderingFeatureKey(size_t id_, Type type_, const Factor* word_, bool isCluster_, + Position position_, Side side_, LexicalReorderingState::ReorderingType reoType_) + : id(id_), type(type_), word(word_), isCluster(isCluster_), + position(position_), side(side_), reoType(reoType_) {} const std::string& Name(const std::string& wordListId) ; @@ -64,6 +66,7 @@ struct HashSparseReorderingFeatureKey : public std::unary_function > WordList; //id and list std::vector m_sourceWordLists; std::vector m_targetWordLists; + typedef std::pair > ClusterMap; //id and map + std::vector m_sourceClusterMaps; + std::vector m_targetClusterMaps; bool m_usePhrase; bool m_useBetween; bool m_useStack; @@ -107,10 +113,13 @@ class SparseReordering void ReadWordList(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector* pWordLists); - void AddFeatures(size_t id, + void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector* pClusterMaps); + void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster); + + void AddFeatures( SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side, const Word& word, SparseReorderingFeatureKey::Position position, - const WordList& words, LexicalReorderingState::ReorderingType reoType, + LexicalReorderingState::ReorderingType reoType, ScoreComponentCollection* scores) const; }; From 556e1123660eb551de74418e89245a008f1fe0f5 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Mon, 30 Jun 2014 00:32:11 +0100 Subject: [PATCH 28/84] Major bug fix in Mmsapt.combine_pstats. --- moses/TranslationModel/UG/mmsapt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index 65a7a06adf..dc99454728 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -576,9 +576,9 @@ namespace Moses else pool.update(a->first,a->second); BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) (*ff)(btb,pool,&ppfix.fvals); + if (ppfix.p2) + tpcoll->Add(createTargetPhrase(src,bta,ppfix)); } - if (ppfix.p2) - tpcoll->Add(createTargetPhrase(src,bta,ppfix)); } return (statsa || statsb); } From c4ca243b7aa265c4e2da341671eba6d90c20509d Mon Sep 17 00:00:00 2001 From: Barry Haddow Date: Mon, 30 Jun 2014 12:13:33 +0100 Subject: [PATCH 29/84] Improved debug for sparse reordering --- moses/FF/LexicalReordering/LexicalReordering.cpp | 8 ++++---- moses/FF/LexicalReordering/SparseReordering.cpp | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index b0b18c65fa..d692336c9b 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -31,7 +31,7 @@ LexicalReordering::LexicalReordering(const std::string &line) } else if (args[0].substr(0,7) == "sparse-") { sparseArgs[args[0].substr(7)] = args[1]; } else { - throw "Unknown argument " + args[0]; + UTIL_THROW(util::Exception,"Unknown argument " + args[0]); } } @@ -39,17 +39,17 @@ LexicalReordering::LexicalReordering(const std::string &line) case LexicalReorderingConfiguration::FE: case LexicalReorderingConfiguration::E: if(m_factorsE.empty()) { - throw "TL factor mask for lexical reordering is unexpectedly empty"; + UTIL_THROW(util::Exception,"TL factor mask for lexical reordering is unexpectedly empty"); } if(m_configuration->GetCondition() == LexicalReorderingConfiguration::E) break; // else fall through case LexicalReorderingConfiguration::F: if(m_factorsF.empty()) { - throw "SL factor mask for lexical reordering is unexpectedly empty"; + UTIL_THROW(util::Exception,"SL factor mask for lexical reordering is unexpectedly empty"); } break; default: - throw "Unknown conditioning option!"; + UTIL_THROW(util::Exception,"Unknown conditioning option!"); } m_configuration->ConfigureSparse(sparseArgs, this); diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp index 3d8f56d16a..bc519eefcb 100644 --- a/moses/FF/LexicalReordering/SparseReordering.cpp +++ b/moses/FF/LexicalReordering/SparseReordering.cpp @@ -135,8 +135,10 @@ void SparseReordering::ReadClusterMap(const string& filename, const string& id, break; } util::TokenIter lineIter(line,util::SingleCharacter('\t')); + if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing word): '" << line << "'"); const Factor* wordFactor = FactorCollection::Instance().AddFactor(*lineIter); ++lineIter; + if (!lineIter) UTIL_THROW(util::Exception, "Malformed cluster line (missing cluster id): '" << line << "'"); const Factor* idFactor = FactorCollection::Instance().AddFactor(*lineIter); pClusterMaps->back().second[wordFactor] = idFactor; PreCalculateFeatureNames(pClusterMaps->size()-1, id, side, idFactor, true); From 17140e4ae70c201b017cb442c013c2003fd1969d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 1 Jul 2014 13:01:51 -0400 Subject: [PATCH 30/84] eclipse --- contrib/other-builds/moses/.cproject | 5 +++- contrib/other-builds/moses/.project | 40 +++++++++++++++++++++------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 0d6abbb4f8..409adfcc57 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -80,8 +80,11 @@ + + + - + diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index ff35ca5baa..1c22fca311 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -601,16 +601,6 @@ 1 PARENT-3-PROJECT_LOC/moses/ReorderingConstraint.h - - ReorderingStack.cpp - 1 - PARENT-3-PROJECT_LOC/moses/ReorderingStack.cpp - - - ReorderingStack.h - 1 - PARENT-3-PROJECT_LOC/moses/ReorderingStack.h - RuleCube.cpp 1 @@ -1331,6 +1321,16 @@ 1 PARENT-3-PROJECT_LOC/moses/FF/SpanLength.h + + FF/SparseHieroReorderingFeature.cpp + 1 + PARENT-3-PROJECT_LOC/moses/FF/SparseHieroReorderingFeature.cpp + + + FF/SparseHieroReorderingFeature.h + 1 + PARENT-3-PROJECT_LOC/moses/FF/SparseHieroReorderingFeature.h + FF/StatefulFeatureFunction.cpp 1 @@ -1916,6 +1916,26 @@ 1 PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/LexicalReorderingTable.h + + FF/LexicalReordering/ReorderingStack.cpp + 1 + PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/ReorderingStack.cpp + + + FF/LexicalReordering/ReorderingStack.h + 1 + PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/ReorderingStack.h + + + FF/LexicalReordering/SparseReordering.cpp + 1 + PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/SparseReordering.cpp + + + FF/LexicalReordering/SparseReordering.h + 1 + PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/SparseReordering.h + FF/OSM-Feature/OpSequenceModel.cpp 1 From bdf8d1a405936e9cfaf3fd295fa7c7f57e5362bb Mon Sep 17 00:00:00 2001 From: Rico Sennrich Date: Fri, 4 Jul 2014 10:28:24 +0100 Subject: [PATCH 31/84] CreateFromString no longer requires factorDelimiter --- moses/TranslationModel/PhraseDictionaryMultiModel.cpp | 5 +---- moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp index 9f39965052..a1824b4751 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModel.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModel.cpp @@ -323,9 +323,6 @@ void PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector(std::vector vector PhraseDictionaryMultiModel::MinimizePerplexity(vector > &phrase_pair_vector) { - const StaticData &staticData = StaticData::Instance(); - const string& factorDelimiter = staticData.GetFactorDelimiter(); - map, size_t> phrase_pair_map; for ( vector >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) { @@ -344,7 +341,7 @@ vector PhraseDictionaryMultiModel::MinimizePerplexity(vector* allStats = new(map); Phrase sourcePhrase(0); - sourcePhrase.CreateFromString(Input, m_input, source_string, factorDelimiter, NULL); + sourcePhrase.CreateFromString(Input, m_input, source_string, NULL); CollectSufficientStatistics(sourcePhrase, allStats); //optimization potential: only call this once per source phrase diff --git a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp index 99d3ad2567..83aa4a7186 100644 --- a/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp +++ b/moses/TranslationModel/PhraseDictionaryMultiModelCounts.cpp @@ -489,9 +489,6 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic vector PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector > &phrase_pair_vector) { - const StaticData &staticData = StaticData::Instance(); - const string& factorDelimiter = staticData.GetFactorDelimiter(); - map, size_t> phrase_pair_map; for ( vector >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) { @@ -510,7 +507,7 @@ vector PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector* allStats = new(map); Phrase sourcePhrase(0); - sourcePhrase.CreateFromString(Input, m_input, source_string, factorDelimiter, NULL); + sourcePhrase.CreateFromString(Input, m_input, source_string, NULL); CollectSufficientStatistics(sourcePhrase, fs, allStats); //optimization potential: only call this once per source phrase From e1c9405b23e9ab9bc6821aa7ac4068748993baa6 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Wed, 9 Jul 2014 02:39:28 +0100 Subject: [PATCH 32/84] Utilities to check gain in phrase coverage by dynamic augmentation of the phrase table in a post-editing scenario. --- .../TranslationModel/UG/spe-check-coverage.cc | 214 ++++++++++++++++++ .../UG/spe-check-coverage2.cc | 76 +++++++ 2 files changed, 290 insertions(+) create mode 100644 moses/TranslationModel/UG/spe-check-coverage.cc create mode 100644 moses/TranslationModel/UG/spe-check-coverage2.cc diff --git a/moses/TranslationModel/UG/spe-check-coverage.cc b/moses/TranslationModel/UG/spe-check-coverage.cc new file mode 100644 index 0000000000..039b4cd371 --- /dev/null +++ b/moses/TranslationModel/UG/spe-check-coverage.cc @@ -0,0 +1,214 @@ +#include "mmsapt.h" +#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" +#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h" +#include +#include +#include +#include +#include +#include + +using namespace Moses; +using namespace bitext; +using namespace std; +using namespace boost; + +vector fo(1,FactorType(0)); + +class SimplePhrase : public Moses::Phrase +{ + vector const m_fo; // factor order +public: + SimplePhrase(): m_fo(1,FactorType(0)) {} + + void init(string const& s) + { + istringstream buf(s); string w; + while (buf >> w) + { + Word wrd; + this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false); + } + } +}; + +class TargetPhraseIndexSorter +{ + TargetPhraseCollection const& my_tpc; + CompareTargetPhrase cmp; +public: + TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {} + bool operator()(size_t a, size_t b) const + { + // return cmp(*my_tpc[a], *my_tpc[b]); + return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() > + my_tpc[b]->GetScoreBreakdown().GetWeightedScore()); + } +}; + +int main(int argc, char* argv[]) +{ + + string vlevel = "alt"; // verbosity level + vector > argfilter(5); + argfilter[0] = std::make_pair(string("--spe-src"),1); + argfilter[1] = std::make_pair(string("--spe-trg"),1); + argfilter[2] = std::make_pair(string("--spe-aln"),1); + argfilter[3] = std::make_pair(string("--spe-show"),1); + + char** my_args; int my_acnt; + char** mo_args; int mo_acnt; + filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter); + + ifstream spe_src,spe_trg,spe_aln; + // instead of translating show coverage by phrase tables + for (int i = 0; i < my_acnt; i += 2) + { + if (!strcmp(my_args[i],"--spe-src")) + spe_src.open(my_args[i+1]); + else if (!strcmp(my_args[i],"--spe-trg")) + spe_trg.open(my_args[i+1]); + else if (!strcmp(my_args[i],"--spe-aln")) + spe_aln.open(my_args[i+1]); + else if (!strcmp(my_args[i],"--spe-show")) + vlevel = my_args[i+1]; + } + + Parameter params; + if (!params.LoadParam(mo_acnt,mo_args) || + !StaticData::LoadDataStatic(¶ms, mo_args[0])) + exit(1); + + StaticData const& global = StaticData::Instance(); + global.SetVerboseLevel(0); + vector ifo = global.GetInputFactorOrder(); + + PhraseDictionary* PT = PhraseDictionary::GetColl()[0]; + Mmsapt* mmsapt = dynamic_cast(PT); + if (!mmsapt) + { + cerr << "Phrase table implementation not supported by this utility." << endl; + exit(1); + } + mmsapt->SetTableLimit(0); + + string srcline,trgline,alnline; + cout.precision(2); + vector fname = mmsapt->GetFeatureNames(); + while (getline(spe_src,srcline)) + { + UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE + << ": missing data for online updates."); + UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE + << ": missing data for online updates."); + cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl; + + // cout << srcline << " " << HERE << endl; + Sentence snt; + istringstream buf(srcline+"\n"); + if (!snt.Read(buf,ifo)) break; + // cout << Phrase(snt) << endl; + int dynprovidx = -1; + for (size_t i = 0; i < fname.size(); ++i) + { + if (fname[i].substr(0,7) == "prov-1.") + dynprovidx = i; + } + cout << endl; + for (size_t i = 0; i < snt.GetSize(); ++i) + { + for (size_t k = i; k < snt.GetSize(); ++k) + { + Phrase p = snt.GetSubString(WordsRange(i,k)); + if (!mmsapt->PrefixExists(p)) break; + TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p); + if (!trg || !trg->GetSize()) continue; + + bool header_done = false; + bool has_dynamic_match = vlevel == "all" || vlevel == "ALL"; + vector order; order.reserve(trg->GetSize()); + size_t stop = trg->GetSize(); + + vector o2(trg->GetSize()); + for (size_t i = 0; i < stop; ++i) o2[i] = i; + sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg)); + + for (size_t r = 0; r < stop; ++r) // r for rank + { + if (vlevel != "ALL") + { + Phrase const& phr = static_cast(*(*trg)[o2[r]]); + ostringstream buf; buf << phr; + string tphrase = buf.str(); + tphrase.erase(tphrase.size()-1); + size_t s = trgline.find(tphrase); + if (s == string::npos) continue; + size_t e = s + tphrase.size(); + if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' ')) + continue; + } + order.push_back(r); + if (!has_dynamic_match) + { + ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown(); + ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT); + FVector const& scores = scc.GetScoresVector(); + has_dynamic_match = scores[idx.first + dynprovidx] > 0; + } + } + if ((vlevel == "alt" || vlevel == "new") && !has_dynamic_match) + continue; + + + BOOST_FOREACH(size_t const& r, order) + { + ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown(); + ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT); + FVector const& scores = scc.GetScoresVector(); + float wscore = scc.GetWeightedScore(); + if (vlevel == "new" && scores[idx.first + dynprovidx] == 0) + continue; + if (!header_done) + { + cout << endl; + if (trg->GetSize() == 1) + cout << p << " (1 translation option)" << endl; + else + cout << p << " (" << trg->GetSize() << " translation options)" << endl; + header_done = true; + } + Phrase const& phr = static_cast(*(*trg)[o2[r]]); + cout << setw(3) << r+1 << " " << phr << endl; + cout << " "; + BOOST_FOREACH(string const& fn, fname) + cout << " " << format("%10.10s") % fn; + cout << endl; + cout << " "; + for (size_t x = idx.first; x < idx.second; ++x) + { + size_t j = x-idx.first; + float f = (mmsapt && mmsapt->isLogVal(j)) ? exp(scores[x]) : scores[x]; + string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f"; + if (fname[j].substr(0,3) == "lex") fmt = "%10.3e"; + if (fname[j].substr(0,7) == "prov-1.") + { + f = round(f/(1-f)); + fmt = "%10d"; + } + cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f); + } + cout << " " << format("%10.3e") % exp(wscore) + << " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl; + } + mmsapt->Release(trg); + continue; + } + } + mmsapt->add(srcline,trgline,alnline); + } + // } + exit(0); +} + + + diff --git a/moses/TranslationModel/UG/spe-check-coverage2.cc b/moses/TranslationModel/UG/spe-check-coverage2.cc new file mode 100644 index 0000000000..fa9ce1c85b --- /dev/null +++ b/moses/TranslationModel/UG/spe-check-coverage2.cc @@ -0,0 +1,76 @@ +#include "mmsapt.h" +#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" +#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h" +#include +#include +#include +#include +#include +#include + +using namespace Moses; +using namespace bitext; +using namespace std; +using namespace boost; + +typedef L2R_Token Token; +typedef mmBitext mmbitext; +typedef imBitext imbitext; +typedef Bitext::iter iter; + +mmbitext bg; + +void +show(ostream& out, iter& f) +{ + iter b(bg.I2.get(),f.getToken(0),f.size()); + if (b.size() == f.size()) + out << setw(12) << int(round(b.approxOccurrenceCount())); + else + out << string(12,' '); + out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " "; + out << f.str(bg.V1.get()) << endl; +} + + +void +dump(ostream& out, iter& f) +{ + float cnt = f.size() ? f.approxOccurrenceCount() : 0; + if (f.down()) + { + cnt = f.approxOccurrenceCount(); + do { dump(out,f); } + while (f.over()); + f.up(); + } + if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1) + show(out,f); +} + + +void +read_data(string fname, vector& dest) +{ + ifstream in(fname.c_str()); + string line; + while (getline(in,line)) dest.push_back(line); + in.close(); +} + +int main(int argc, char* argv[]) +{ + bg.open(argv[1],argv[2],argv[3]); + sptr fg(new imbitext(bg.V1,bg.V2)); + vector src,trg,aln; + read_data(argv[4],src); + read_data(argv[5],trg); + read_data(argv[6],aln); + fg = fg->add(src,trg,aln); + iter mfg(fg->I1.get()); + dump(cout,mfg); + exit(0); +} + + + From 28d64e23396cba53a83b75e01e9977db636e2285 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Wed, 9 Jul 2014 02:40:40 +0100 Subject: [PATCH 33/84] Simulated post-editing sessions feeding reference and alignment into the system after automatic translation of each source sentence. --- moses-cmd/simulate-pe.cc | 856 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 856 insertions(+) create mode 100644 moses-cmd/simulate-pe.cc diff --git a/moses-cmd/simulate-pe.cc b/moses-cmd/simulate-pe.cc new file mode 100644 index 0000000000..e88c1e4635 --- /dev/null +++ b/moses-cmd/simulate-pe.cc @@ -0,0 +1,856 @@ +// Fork of Main.cpp, to simulate post-editing sessions. +// Written by Ulrich Germann. + +/*********************************************************************** +Moses - factored phrase-based language decoder +Copyright (C) 2009 University of Edinburgh + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +***********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "util/usage.hh" +#include "util/exception.hh" +#include "moses/Util.h" +#include "moses/TranslationModel/UG/mmsapt.h" +#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h" + +#ifdef WIN32 +// Include Visual Leak Detector +//#include +#endif + +#include "TranslationAnalysis.h" +#include "IOWrapper.h" +#include "mbr.h" + +#include "moses/Hypothesis.h" +#include "moses/Manager.h" +#include "moses/StaticData.h" +#include "moses/Util.h" +#include "moses/Timer.h" +#include "moses/ThreadPool.h" +#include "moses/OutputCollector.h" +#include "moses/TranslationModel/PhraseDictionary.h" +#include "moses/FF/StatefulFeatureFunction.h" +#include "moses/FF/StatelessFeatureFunction.h" + +#ifdef HAVE_PROTOBUF +#include "hypergraph.pb.h" +#endif + +using namespace std; +using namespace Moses; +using namespace MosesCmd; + +namespace MosesCmd +{ +// output floats with five significant digits +static const size_t PRECISION = 3; + +/** Enforce rounding */ +void fix(std::ostream& stream, size_t size) +{ + stream.setf(std::ios::fixed); + stream.precision(size); +} + +/** Translates a sentence. + * - calls the search (Manager) + * - applies the decision rule + * - outputs best translation and additional reporting + **/ +class TranslationTask : public Task +{ + +public: + + TranslationTask(size_t lineNumber, + InputType* source, + OutputCollector* outputCollector, + OutputCollector* nbestCollector, + OutputCollector* latticeSamplesCollector, + OutputCollector* wordGraphCollector, + OutputCollector* searchGraphCollector, + OutputCollector* detailedTranslationCollector, + OutputCollector* alignmentInfoCollector, + OutputCollector* unknownsCollector, + bool outputSearchGraphSLF, + bool outputSearchGraphHypergraph) + : m_source(source) + , m_lineNumber(lineNumber) + , m_outputCollector(outputCollector) + , m_nbestCollector(nbestCollector) + , m_latticeSamplesCollector(latticeSamplesCollector) + , m_wordGraphCollector(wordGraphCollector) + , m_searchGraphCollector(searchGraphCollector) + , m_detailedTranslationCollector(detailedTranslationCollector) + , m_alignmentInfoCollector(alignmentInfoCollector) + , m_unknownsCollector(unknownsCollector) + , m_outputSearchGraphSLF(outputSearchGraphSLF) + , m_outputSearchGraphHypergraph(outputSearchGraphHypergraph) + { } + + /** Translate one sentence + * gets called by main function implemented at end of this source file */ + void Run() { + // shorthand for "global data" + const StaticData &staticData = StaticData::Instance(); + + // input sentence + Sentence sentence; + + // report wall time spent on translation + Timer translationTime; + translationTime.start(); + + // report thread number +#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS) + TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << pthread_self() << std::endl); +#endif + + + // execute the translation + // note: this executes the search, resulting in a search graph + // we still need to apply the decision rule (MAP, MBR, ...) + Timer initTime; + initTime.start(); + Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm()); + VERBOSE(1, "Line " << m_lineNumber << ": Initialize search took " << initTime << " seconds total" << endl); + manager.ProcessSentence(); + + // we are done with search, let's look what we got + Timer additionalReportingTime; + additionalReportingTime.start(); + + // output word graph + if (m_wordGraphCollector) { + ostringstream out; + fix(out,PRECISION); + manager.GetWordGraph(m_lineNumber, out); + m_wordGraphCollector->Write(m_lineNumber, out.str()); + } + + // output search graph + if (m_searchGraphCollector) { + ostringstream out; + fix(out,PRECISION); + manager.OutputSearchGraph(m_lineNumber, out); + m_searchGraphCollector->Write(m_lineNumber, out.str()); + +#ifdef HAVE_PROTOBUF + if (staticData.GetOutputSearchGraphPB()) { + ostringstream sfn; + sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << m_lineNumber << ".pb" << ends; + string fn = sfn.str(); + VERBOSE(2, "Writing search graph to " << fn << endl); + fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out); + manager.SerializeSearchGraphPB(m_lineNumber, output); + } +#endif + } + + // Output search graph in HTK standard lattice format (SLF) + if (m_outputSearchGraphSLF) { + stringstream fileName; + fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << m_lineNumber << ".slf"; + std::ofstream *file = new std::ofstream; + file->open(fileName.str().c_str()); + if (file->is_open() && file->good()) { + ostringstream out; + fix(out,PRECISION); + manager.OutputSearchGraphAsSLF(m_lineNumber, out); + *file << out.str(); + file -> flush(); + } else { + TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl); + } + delete file; + } + + // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder + if (m_outputSearchGraphHypergraph) { + + vector hypergraphParameters = staticData.GetParam("output-search-graph-hypergraph"); + + bool appendSuffix; + if (hypergraphParameters.size() > 0 && hypergraphParameters[0] == "true") { + appendSuffix = true; + } else { + appendSuffix = false; + } + + string compression; + if (hypergraphParameters.size() > 1) { + compression = hypergraphParameters[1]; + } else { + compression = "txt"; + } + + string hypergraphDir; + if ( hypergraphParameters.size() > 2 ) { + hypergraphDir = hypergraphParameters[2]; + } else { + string nbestFile = staticData.GetNBestFilePath(); + if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) { + boost::filesystem::path nbestPath(nbestFile); + + // In the Boost filesystem API version 2, + // which was the default prior to Boost 1.46, + // the filename() method returned a string. + // + // In the Boost filesystem API version 3, + // which is the default starting with Boost 1.46, + // the filename() method returns a path object. + // + // To get a string from the path object, + // the native() method must be called. + // hypergraphDir = nbestPath.parent_path().filename() + //#if BOOST_VERSION >= 104600 + // .native() + //#endif + //; + + // Hopefully the following compiles under all versions of Boost. + // + // If this line gives you compile errors, + // contact Lane Schwartz on the Moses mailing list + hypergraphDir = nbestPath.parent_path().string(); + + } else { + stringstream hypergraphDirName; + hypergraphDirName << boost::filesystem::current_path().string() << "/hypergraph"; + hypergraphDir = hypergraphDirName.str(); + } + } + + if ( ! boost::filesystem::exists(hypergraphDir) ) { + boost::filesystem::create_directory(hypergraphDir); + } + + if ( ! boost::filesystem::exists(hypergraphDir) ) { + TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because the directory does not exist" << std::endl); + } else if ( ! boost::filesystem::is_directory(hypergraphDir) ) { + TRACE_ERR("Cannot output hypergraphs to " << hypergraphDir << " because that path exists, but is not a directory" << std::endl); + } else { + stringstream fileName; + fileName << hypergraphDir << "/" << m_lineNumber; + if ( appendSuffix ) { + fileName << "." << compression; + } + boost::iostreams::filtering_ostream *file + = new boost::iostreams::filtering_ostream; + + if ( compression == "gz" ) { + file->push( boost::iostreams::gzip_compressor() ); + } else if ( compression == "bz2" ) { + file->push( boost::iostreams::bzip2_compressor() ); + } else if ( compression != "txt" ) { + TRACE_ERR("Unrecognized hypergraph compression format (" + << compression + << ") - using uncompressed plain txt" << std::endl); + compression = "txt"; + } + + file->push( boost::iostreams::file_sink(fileName.str(), ios_base::out) ); + + if (file->is_complete() && file->good()) { + fix(*file,PRECISION); + manager.OutputSearchGraphAsHypergraph(m_lineNumber, *file); + file -> flush(); + } else { + TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber + << " because the output file " << fileName.str() + << " is not open or not ready for writing" + << std::endl); + } + file -> pop(); + delete file; + } + } + additionalReportingTime.stop(); + + // apply decision rule and output best translation(s) + if (m_outputCollector) { + ostringstream out; + ostringstream debug; + fix(debug,PRECISION); + + // all derivations - send them to debug stream + if (staticData.PrintAllDerivations()) { + additionalReportingTime.start(); + manager.PrintAllDerivations(m_lineNumber, debug); + additionalReportingTime.stop(); + } + + Timer decisionRuleTime; + decisionRuleTime.start(); + + // MAP decoding: best hypothesis + const Hypothesis* bestHypo = NULL; + if (!staticData.UseMBR()) { + bestHypo = manager.GetBestHypothesis(); + if (bestHypo) { + if (StaticData::Instance().GetOutputHypoScore()) { + out << bestHypo->GetTotalScore() << ' '; + } + if (staticData.IsPathRecoveryEnabled()) { + OutputInput(out, bestHypo); + out << "||| "; + } + if (staticData.GetParam("print-id").size() && Scan(staticData.GetParam("print-id")[0]) ) { + out << m_source->GetTranslationId() << " "; + } + + if (staticData.GetReportSegmentation() == 2) { + manager.GetOutputLanguageModelOrder(out, bestHypo); + } + OutputBestSurface( + out, + bestHypo, + staticData.GetOutputFactorOrder(), + staticData.GetReportSegmentation(), + staticData.GetReportAllFactors()); + if (staticData.PrintAlignmentInfo()) { + out << "||| "; + OutputAlignment(out, bestHypo); + } + + OutputAlignment(m_alignmentInfoCollector, m_lineNumber, bestHypo); + IFVERBOSE(1) { + debug << "BEST TRANSLATION: " << *bestHypo << endl; + } + } else { + VERBOSE(1, "NO BEST TRANSLATION" << endl); + } + + out << endl; + } + + // MBR decoding (n-best MBR, lattice MBR, consensus) + else { + // we first need the n-best translations + size_t nBestSize = staticData.GetMBRSize(); + if (nBestSize <= 0) { + cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl; + exit(1); + } + TrellisPathList nBestList; + manager.CalcNBest(nBestSize, nBestList,true); + VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl); + IFVERBOSE(2) { + PrintUserTime("calculated n-best list for (L)MBR decoding"); + } + + // lattice MBR + if (staticData.UseLatticeMBR()) { + if (m_nbestCollector) { + //lattice mbr nbest + vector solutions; + size_t n = min(nBestSize, staticData.GetNBestSize()); + getLatticeMBRNBest(manager,nBestList,solutions,n); + ostringstream out; + OutputLatticeMBRNBest(out, solutions,m_lineNumber); + m_nbestCollector->Write(m_lineNumber, out.str()); + } else { + //Lattice MBR decoding + vector mbrBestHypo = doLatticeMBR(manager,nBestList); + OutputBestHypo(mbrBestHypo, m_lineNumber, staticData.GetReportSegmentation(), + staticData.GetReportAllFactors(),out); + IFVERBOSE(2) { + PrintUserTime("finished Lattice MBR decoding"); + } + } + } + + // consensus decoding + else if (staticData.UseConsensusDecoding()) { + const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList); + OutputBestHypo(conBestHypo, m_lineNumber, + staticData.GetReportSegmentation(), + staticData.GetReportAllFactors(),out); + OutputAlignment(m_alignmentInfoCollector, m_lineNumber, conBestHypo); + IFVERBOSE(2) { + PrintUserTime("finished Consensus decoding"); + } + } + + // n-best MBR decoding + else { + const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList); + OutputBestHypo(mbrBestHypo, m_lineNumber, + staticData.GetReportSegmentation(), + staticData.GetReportAllFactors(),out); + OutputAlignment(m_alignmentInfoCollector, m_lineNumber, mbrBestHypo); + IFVERBOSE(2) { + PrintUserTime("finished MBR decoding"); + } + } + } + + // report best translation to output collector + m_outputCollector->Write(m_lineNumber,out.str(),debug.str()); + + decisionRuleTime.stop(); + VERBOSE(1, "Line " << m_lineNumber << ": Decision rule took " << decisionRuleTime << " seconds total" << endl); + } + + additionalReportingTime.start(); + + // output n-best list + if (m_nbestCollector && !staticData.UseLatticeMBR()) { + TrellisPathList nBestList; + ostringstream out; + manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest()); + OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), m_lineNumber, + staticData.GetReportSegmentation()); + m_nbestCollector->Write(m_lineNumber, out.str()); + } + + //lattice samples + if (m_latticeSamplesCollector) { + TrellisPathList latticeSamples; + ostringstream out; + manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples); + OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_lineNumber, + staticData.GetReportSegmentation()); + m_latticeSamplesCollector->Write(m_lineNumber, out.str()); + } + + // detailed translation reporting + if (m_detailedTranslationCollector) { + ostringstream out; + fix(out,PRECISION); + TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis()); + m_detailedTranslationCollector->Write(m_lineNumber,out.str()); + } + + //list of unknown words + if (m_unknownsCollector) { + const vector& unknowns = manager.getSntTranslationOptions()->GetUnknownSources(); + ostringstream out; + for (size_t i = 0; i < unknowns.size(); ++i) { + out << *(unknowns[i]); + } + out << endl; + m_unknownsCollector->Write(m_lineNumber, out.str()); + } + + // report additional statistics + manager.CalcDecoderStatistics(); + VERBOSE(1, "Line " << m_lineNumber << ": Additional reporting took " << additionalReportingTime << " seconds total" << endl); + VERBOSE(1, "Line " << m_lineNumber << ": Translation took " << translationTime << " seconds total" << endl); + IFVERBOSE(2) { + PrintUserTime("Sentence Decoding Time:"); + } + } + + ~TranslationTask() { + delete m_source; + } + +private: + InputType* m_source; + size_t m_lineNumber; + OutputCollector* m_outputCollector; + OutputCollector* m_nbestCollector; + OutputCollector* m_latticeSamplesCollector; + OutputCollector* m_wordGraphCollector; + OutputCollector* m_searchGraphCollector; + OutputCollector* m_detailedTranslationCollector; + OutputCollector* m_alignmentInfoCollector; + OutputCollector* m_unknownsCollector; + bool m_outputSearchGraphSLF; + bool m_outputSearchGraphHypergraph; + std::ofstream *m_alignmentStream; + + +}; + +static void PrintFeatureWeight(const FeatureFunction* ff) +{ + cout << ff->GetScoreProducerDescription() << "="; + size_t numScoreComps = ff->GetNumScoreComponents(); + vector values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff); + for (size_t i = 0; i < numScoreComps; ++i) { + cout << " " << values[i]; + } + cout << endl; +} + +static void ShowWeights() +{ + //TODO: Find a way of ensuring this order is synced with the nbest + fix(cout,6); + const vector& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions(); + const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + + for (size_t i = 0; i < sff.size(); ++i) { + const StatefulFeatureFunction *ff = sff[i]; + if (ff->IsTuneable()) { + PrintFeatureWeight(ff); + } + else { + cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; + } + } + for (size_t i = 0; i < slf.size(); ++i) { + const StatelessFeatureFunction *ff = slf[i]; + if (ff->IsTuneable()) { + PrintFeatureWeight(ff); + } + else { + cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; + } + } +} + +size_t OutputFeatureWeightsForHypergraph(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) +{ + size_t numScoreComps = ff->GetNumScoreComponents(); + if (numScoreComps != 0) { + vector values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff); + if (numScoreComps > 1) { + for (size_t i = 0; i < numScoreComps; ++i) { + outputSearchGraphStream << ff->GetScoreProducerDescription() + << i + << "=" << values[i] << endl; + } + } else { + outputSearchGraphStream << ff->GetScoreProducerDescription() + << "=" << values[0] << endl; + } + return index+numScoreComps; + } else { + UTIL_THROW2("Sparse features are not yet supported when outputting hypergraph format"); + } +} + +void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream) +{ + outputSearchGraphStream.setf(std::ios::fixed); + outputSearchGraphStream.precision(6); + + const vector& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions(); + const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); + size_t featureIndex = 1; + for (size_t i = 0; i < sff.size(); ++i) { + featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, sff[i], outputSearchGraphStream); + } + for (size_t i = 0; i < slf.size(); ++i) { + /* + if (slf[i]->GetScoreProducerWeightShortName() != "u" && + slf[i]->GetScoreProducerWeightShortName() != "tm" && + slf[i]->GetScoreProducerWeightShortName() != "I" && + slf[i]->GetScoreProducerWeightShortName() != "g") + */ + { + featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, slf[i], outputSearchGraphStream); + } + } + const vector& pds = PhraseDictionary::GetColl(); + for( size_t i=0; i& gds = GenerationDictionary::GetColl(); + for( size_t i=0; i > argfilter(4); + argfilter[0] = std::make_pair(string("--spe-src"),1); + argfilter[1] = std::make_pair(string("--spe-trg"),1); + argfilter[2] = std::make_pair(string("--spe-aln"),1); + + char** my_args; int my_acnt; + char** mo_args; int mo_acnt; + filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter); + + ifstream spe_src,spe_trg,spe_aln; + // instead of translating show coverage by phrase tables + for (int i = 0; i < my_acnt; i += 2) + { + if (!strcmp(my_args[i],"--spe-src")) + spe_src.open(my_args[i+1]); + else if (!strcmp(my_args[i],"--spe-trg")) + spe_trg.open(my_args[i+1]); + else if (!strcmp(my_args[i],"--spe-aln")) + spe_aln.open(my_args[i+1]); + } + + // load all the settings into the Parameter class + // (stores them as strings, or array of strings) + Parameter params; + if (!params.LoadParam(mo_acnt,mo_args)) { + exit(1); + } + + + // initialize all "global" variables, which are stored in StaticData + // note: this also loads models such as the language model, etc. + if (!StaticData::LoadDataStatic(¶ms, argv[0])) { + exit(1); + } + + // setting "-show-weights" -> just dump out weights and exit + if (params.isParamSpecified("show-weights")) { + ShowWeights(); + exit(0); + } + + // shorthand for accessing information in StaticData + const StaticData& staticData = StaticData::Instance(); + + + //initialise random numbers + srand(time(NULL)); + + // set up read/writing class + IOWrapper* ioWrapper = GetIOWrapper(staticData); + if (!ioWrapper) { + cerr << "Error; Failed to create IO object" << endl; + exit(1); + } + + // check on weights + const ScoreComponentCollection& weights = staticData.GetAllWeights(); + IFVERBOSE(2) { + TRACE_ERR("The global weight vector looks like this: "); + TRACE_ERR(weights); + TRACE_ERR("\n"); + } + if (staticData.GetOutputSearchGraphHypergraph()) { + ofstream* weightsOut = new std::ofstream; + stringstream weightsFilename; + if (staticData.GetParam("output-search-graph-hypergraph").size() > 3) { + weightsFilename << staticData.GetParam("output-search-graph-hypergraph")[3]; + } else { + string nbestFile = staticData.GetNBestFilePath(); + if ( ! nbestFile.empty() && nbestFile!="-" && !boost::starts_with(nbestFile,"/dev/stdout") ) { + boost::filesystem::path nbestPath(nbestFile); + weightsFilename << nbestPath.parent_path().filename() << "/weights"; + } else { + weightsFilename << boost::filesystem::current_path().string() << "/hypergraph/weights"; + } + } + boost::filesystem::path weightsFilePath(weightsFilename.str()); + if ( ! boost::filesystem::exists(weightsFilePath.parent_path()) ) { + boost::filesystem::create_directory(weightsFilePath.parent_path()); + } + TRACE_ERR("The weights file is " << weightsFilename.str() << "\n"); + weightsOut->open(weightsFilename.str().c_str()); + OutputFeatureWeightsForHypergraph(*weightsOut); + weightsOut->flush(); + weightsOut->close(); + delete weightsOut; + } + + + // initialize output streams + // note: we can't just write to STDOUT or files + // because multithreading may return sentences in shuffled order + auto_ptr outputCollector; // for translations + auto_ptr nbestCollector; // for n-best lists + auto_ptr latticeSamplesCollector; //for lattice samples + auto_ptr nbestOut; + auto_ptr latticeSamplesOut; + size_t nbestSize = staticData.GetNBestSize(); + string nbestFile = staticData.GetNBestFilePath(); + bool output1best = true; + if (nbestSize) { + if (nbestFile == "-" || nbestFile == "/dev/stdout") { + // nbest to stdout, no 1-best + nbestCollector.reset(new OutputCollector()); + output1best = false; + } else { + // nbest to file, 1-best to stdout + nbestOut.reset(new ofstream(nbestFile.c_str())); + if (!nbestOut->good()) { + TRACE_ERR("ERROR: Failed to open " << nbestFile << " for nbest lists" << endl); + exit(1); + } + nbestCollector.reset(new OutputCollector(nbestOut.get())); + } + } + size_t latticeSamplesSize = staticData.GetLatticeSamplesSize(); + string latticeSamplesFile = staticData.GetLatticeSamplesFilePath(); + if (latticeSamplesSize) { + if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") { + latticeSamplesCollector.reset(new OutputCollector()); + output1best = false; + } else { + latticeSamplesOut.reset(new ofstream(latticeSamplesFile.c_str())); + if (!latticeSamplesOut->good()) { + TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl); + exit(1); + } + latticeSamplesCollector.reset(new OutputCollector(latticeSamplesOut.get())); + } + } + if (output1best) { + outputCollector.reset(new OutputCollector()); + } + + // initialize stream for word graph (aka: output lattice) + auto_ptr wordGraphCollector; + if (staticData.GetOutputWordGraph()) { + wordGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputWordGraphStream()))); + } + + // initialize stream for search graph + // note: this is essentially the same as above, but in a different format + auto_ptr searchGraphCollector; + if (staticData.GetOutputSearchGraph()) { + searchGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputSearchGraphStream()))); + } + + // initialize stram for details about the decoder run + auto_ptr detailedTranslationCollector; + if (staticData.IsDetailedTranslationReportingEnabled()) { + detailedTranslationCollector.reset(new OutputCollector(&(ioWrapper->GetDetailedTranslationReportingStream()))); + } + + // initialize stram for word alignment between input and output + auto_ptr alignmentInfoCollector; + if (!staticData.GetAlignmentOutputFile().empty()) { + alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream())); + } + + //initialise stream for unknown (oov) words + auto_ptr unknownsCollector; + auto_ptr unknownsStream; + if (!staticData.GetOutputUnknownsFile().empty()) { + unknownsStream.reset(new ofstream(staticData.GetOutputUnknownsFile().c_str())); + if (!unknownsStream->good()) { + TRACE_ERR("Unable to open " << staticData.GetOutputUnknownsFile() << " for unknowns"); + exit(1); + } + unknownsCollector.reset(new OutputCollector(unknownsStream.get())); + } + +#ifdef WITH_THREADS + ThreadPool pool(staticData.ThreadCount()); +#endif + + // main loop over set of input sentences + InputType* source = NULL; + size_t lineCount = staticData.GetStartTranslationId(); + while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) { + IFVERBOSE(1) { + ResetUserTime(); + } + // set up task of translating one sentence + TranslationTask* task = + new TranslationTask(lineCount,source, outputCollector.get(), + nbestCollector.get(), + latticeSamplesCollector.get(), + wordGraphCollector.get(), + searchGraphCollector.get(), + detailedTranslationCollector.get(), + alignmentInfoCollector.get(), + unknownsCollector.get(), + staticData.GetOutputSearchGraphSLF(), + staticData.GetOutputSearchGraphHypergraph()); + // execute task +#ifdef WITH_THREADS + if (my_acnt) + { + task->Run(); + delete task; + string src,trg,aln; + UTIL_THROW_IF2(!getline(spe_src,src), "[" << HERE << "] " + << "missing update data for simulated post-editing."); + UTIL_THROW_IF2(!getline(spe_trg,trg), "[" << HERE << "] " + << "missing update data for simulated post-editing."); + UTIL_THROW_IF2(!getline(spe_aln,aln), "[" << HERE << "] " + << "missing update data for simulated post-editing."); + BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl()) + { + Mmsapt* sapt = dynamic_cast(pd); + if (sapt) sapt->add(src,trg,aln); + VERBOSE(1,"[" << HERE << " added src] " << src << endl); + VERBOSE(1,"[" << HERE << " added trg] " << trg << endl); + VERBOSE(1,"[" << HERE << " added aln] " << aln << endl); + } + } + else pool.Submit(task); +#else + task->Run(); + delete task; +#endif + + source = NULL; //make sure it doesn't get deleted + ++lineCount; + } + + // we are done, finishing up +#ifdef WITH_THREADS + pool.Stop(true); //flush remaining jobs +#endif + + delete ioWrapper; + FeatureFunction::Destroy(); + + } catch (const std::exception &e) { + std::cerr << "Exception: " << e.what() << std::endl; + return EXIT_FAILURE; + } + + IFVERBOSE(1) util::PrintUsage(std::cerr); + +#ifndef EXIT_RETURN + //This avoids that destructors are called (it can take a long time) + exit(EXIT_SUCCESS); +#else + return EXIT_SUCCESS; +#endif +} From 4d41211c2cd6eb75c5a229c10e98fdfa1acff3b4 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Wed, 9 Jul 2014 02:41:28 +0100 Subject: [PATCH 34/84] Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. --- .gitignore | 1 + Jamroot | 4 +- OnDiskPt/queryOnDiskPt.cpp | 2 +- contrib/server/mosesserver.cpp | 39 +- moses-cmd/Jamfile | 9 +- moses/BitmapContainer.cpp | 14 +- moses/Manager.cpp | 4 +- moses/TranslationModel/UG/Jamfile | 35 +- .../program_options/ug_splice_arglist.cc | 50 + .../program_options/ug_splice_arglist.h | 18 + moses/TranslationModel/UG/mm/Jamfile | 19 +- moses/TranslationModel/UG/mm/custom-pt.cc | 9 +- moses/TranslationModel/UG/mm/ug_bitext.cc | 183 +-- moses/TranslationModel/UG/mm/ug_bitext.h | 39 +- moses/TranslationModel/UG/mm/ug_im_ttrack.h | 34 +- .../UG/mm/ug_lexical_phrase_scorer2.h | 21 +- moses/TranslationModel/UG/mm/ug_phrasepair.cc | 97 ++ moses/TranslationModel/UG/mm/ug_phrasepair.h | 243 ++++ .../UG/mm/ug_tsa_tree_iterator.h | 46 +- moses/TranslationModel/UG/mmsapt.cpp | 1034 ++++++++++------- moses/TranslationModel/UG/mmsapt.h | 87 +- moses/TranslationModel/UG/mmsapt_align.cc | 607 +++++----- .../UG/mmsapt_phrase_scorers.h | 269 +---- moses/TranslationModel/UG/ptable-lookup.cc | 14 +- moses/TranslationModel/UG/sapt_phrase_key.h | 13 + .../TranslationModel/UG/sapt_phrase_scorers.h | 12 + moses/TranslationModel/UG/sapt_pscore_base.h | 103 ++ .../UG/sapt_pscore_coherence.h | 33 + moses/TranslationModel/UG/sapt_pscore_lex1.h | 70 ++ .../TranslationModel/UG/sapt_pscore_logcnt.h | 65 ++ moses/TranslationModel/UG/sapt_pscore_pbwd.h | 58 + moses/TranslationModel/UG/sapt_pscore_pfwd.h | 70 ++ .../UG/sapt_pscore_provenance.h | 47 + .../UG/sapt_pscore_rareness.h | 41 + .../UG/sapt_pscore_unaligned.h | 67 ++ moses/TranslationModel/UG/sim-pe.cc | 83 ++ moses/TranslationModel/UG/try-align.cc | 47 +- .../fuzzy-match/FuzzyMatchWrapper.cpp | 4 +- moses/TypeDef.h | 6 +- moses/Util.h | 4 + scripts/server/moses.py | 10 +- scripts/server/sim-pe.py | 57 +- 42 files changed, 2365 insertions(+), 1303 deletions(-) create mode 100644 moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc create mode 100644 moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h create mode 100644 moses/TranslationModel/UG/mm/ug_phrasepair.cc create mode 100644 moses/TranslationModel/UG/mm/ug_phrasepair.h create mode 100644 moses/TranslationModel/UG/sapt_phrase_key.h create mode 100644 moses/TranslationModel/UG/sapt_phrase_scorers.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_base.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_coherence.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_lex1.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_logcnt.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_pbwd.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_pfwd.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_provenance.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_rareness.h create mode 100644 moses/TranslationModel/UG/sapt_pscore_unaligned.h create mode 100644 moses/TranslationModel/UG/sim-pe.cc diff --git a/.gitignore b/.gitignore index f870bed033..e7c37d86c6 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,4 @@ nbproject/ mingw/MosesGUI/MosesGUI.e4p mingw/MosesGUI/_eric4project/ +contrib/m4m/merge-sorted diff --git a/Jamroot b/Jamroot index 283b4dd6f9..79ec39940f 100644 --- a/Jamroot +++ b/Jamroot @@ -152,13 +152,15 @@ build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses if [ option.get "with-mm" : : "yes" ] { alias mm : + moses/TranslationModel/UG//spe-check-coverage2 moses/TranslationModel/UG//ptable-lookup + moses/TranslationModel/UG//sim-pe + moses/TranslationModel/UG//spe-check-coverage moses/TranslationModel/UG/mm//mtt-build moses/TranslationModel/UG/mm//mtt-dump moses/TranslationModel/UG/mm//symal2mam moses/TranslationModel/UG/mm//mam2symal moses/TranslationModel/UG/mm//mam_verify - moses/TranslationModel/UG/mm//custom-pt moses/TranslationModel/UG/mm//mmlex-build moses/TranslationModel/UG/mm//mmlex-lookup moses/TranslationModel/UG/mm//mtt-count-words diff --git a/OnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp index a38fc5435f..77576d9565 100644 --- a/OnDiskPt/queryOnDiskPt.cpp +++ b/OnDiskPt/queryOnDiskPt.cpp @@ -22,7 +22,7 @@ int main(int argc, char **argv) { int tableLimit = 20; std::string ttable = ""; - bool useAlignments = false; + // bool useAlignments = false; for(int i = 1; i < argc; i++) { if(!strcmp(argv[i], "-tlimit")) { diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp index 1ff11f0ae2..f14111f331 100644 --- a/contrib/server/mosesserver.cpp +++ b/contrib/server/mosesserver.cpp @@ -4,6 +4,7 @@ #include +#include "moses/Util.h" #include "moses/ChartManager.h" #include "moses/Hypothesis.h" #include "moses/Manager.h" @@ -59,7 +60,7 @@ class Updater: public xmlrpc_c::method if(add2ORLM_) { //updateORLM(); } - cerr << "Done inserting\n"; + XVERBOSE(1,"Done inserting\n"); //PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy); map retData; //*retvalP = xmlrpc_c::value_struct(retData); @@ -120,17 +121,17 @@ class Updater: public xmlrpc_c::method if(si == params.end()) throw xmlrpc_c::fault("Missing source sentence", xmlrpc_c::fault::CODE_PARSE); source_ = xmlrpc_c::value_string(si->second); - cerr << "source = " << source_ << endl; + XVERBOSE(1,"source = " << source_ << endl); si = params.find("target"); if(si == params.end()) throw xmlrpc_c::fault("Missing target sentence", xmlrpc_c::fault::CODE_PARSE); target_ = xmlrpc_c::value_string(si->second); - cerr << "target = " << target_ << endl; + XVERBOSE(1,"target = " << target_ << endl); si = params.find("alignment"); if(si == params.end()) throw xmlrpc_c::fault("Missing alignment", xmlrpc_c::fault::CODE_PARSE); alignment_ = xmlrpc_c::value_string(si->second); - cerr << "alignment = " << alignment_ << endl; + XVERBOSE(1,"alignment = " << alignment_ << endl); si = params.find("bounded"); bounded_ = (si != params.end()); si = params.find("updateORLM"); @@ -224,7 +225,7 @@ class Translator : public xmlrpc_c::method } const string source((xmlrpc_c::value_string(si->second))); - cerr << "Input: " << source << endl; + XVERBOSE(1,"Input: " << source << endl); si = params.find("align"); bool addAlignInfo = (si != params.end()); si = params.find("word-align"); @@ -287,13 +288,13 @@ class Translator : public xmlrpc_c::method } } else { Sentence sentence; - const vector &inputFactorOrder = - staticData.GetInputFactorOrder(); + const vector & + inputFactorOrder = staticData.GetInputFactorOrder(); stringstream in(source + "\n"); sentence.Read(in,inputFactorOrder); size_t lineNumber = 0; // TODO: Include sentence request number here? Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm()); - manager.ProcessSentence(); + manager.ProcessSentence(); const Hypothesis* hypo = manager.GetBestHypothesis(); vector alignInfo; @@ -331,7 +332,7 @@ class Translator : public xmlrpc_c::method pair text("text", xmlrpc_c::value_string(out.str())); retData.insert(text); - cerr << "Output: " << out.str() << endl; + XVERBOSE(1,"Output: " << out.str() << endl); *retvalP = xmlrpc_c::value_struct(retData); } @@ -574,7 +575,7 @@ int main(int argc, char** argv) { //Extract port and log, send other args to moses - char** mosesargv = new char*[argc+2]; + char** mosesargv = new char*[argc+2]; // why "+2" [UG] int mosesargc = 0; int port = 8080; const char* logfile = "/dev/null"; @@ -634,11 +635,11 @@ int main(int argc, char** argv) myRegistry.addMethod("updater", updater); myRegistry.addMethod("optimize", optimizer); - xmlrpc_c::serverAbyss myAbyssServer( - myRegistry, - port, // TCP port on which to listen - logfile - ); + xmlrpc_c::serverAbyss myAbyssServer( + myRegistry, + port, // TCP port on which to listen + logfile + ); /* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04 xmlrpc_c::serverAbyss myAbyssServer( xmlrpc_c::serverAbyss::constrOpt() @@ -648,12 +649,10 @@ int main(int argc, char** argv) .allowOrigin("*") ); */ - - cerr << "Listening on port " << port << endl; + + XVERBOSE(1,"Listening on port " << port << endl); if (isSerial) { - while(1) { - myAbyssServer.runOnce(); - } + while(1) myAbyssServer.runOnce(); } else { myAbyssServer.run(); } diff --git a/moses-cmd/Jamfile b/moses-cmd/Jamfile index bddc109110..d257cd26cf 100644 --- a/moses-cmd/Jamfile +++ b/moses-cmd/Jamfile @@ -3,4 +3,11 @@ alias deps : IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp ..//z exe moses : Main.cpp deps ; exe lmbrgrid : LatticeMBRGrid.cpp deps ; -alias programs : moses lmbrgrid ; +exe simulate-pe : +simulate-pe.cc +$(TOP)/moses/TranslationModel/UG/generic//generic +$(TOP)//boost_program_options +deps +; + +alias programs : moses lmbrgrid simulate-pe ; diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp index 981b04895a..ee2d55fc8b 100644 --- a/moses/BitmapContainer.cpp +++ b/moses/BitmapContainer.cpp @@ -161,13 +161,17 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer } if (m_translations.size() > 1) { - UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(), - "Non-monotonic future score"); + UTIL_THROW_IF2(m_translations.Get(0)->GetFutureScore() < m_translations.Get(1)->GetFutureScore(), + "Non-monotonic future score: " + << m_translations.Get(0)->GetFutureScore() << " vs. " + << m_translations.Get(1)->GetFutureScore()); } if (m_hypotheses.size() > 1) { UTIL_THROW_IF2(m_hypotheses[0]->GetTotalScore() < m_hypotheses[1]->GetTotalScore(), - "Non-monotonic total score"); + "Non-monotonic total score" + << m_hypotheses[0]->GetTotalScore() << " vs. " + << m_hypotheses[1]->GetTotalScore()); } HypothesisScoreOrdererWithDistortion orderer (&transOptRange); @@ -442,7 +446,9 @@ BitmapContainer::ProcessBestHypothesis() if (!Empty()) { HypothesisQueueItem *check = Dequeue(true); UTIL_THROW_IF2(item->GetHypothesis()->GetTotalScore() < check->GetHypothesis()->GetTotalScore(), - "Non-monotonic total score"); + "Non-monotonic total score: " + << item->GetHypothesis()->GetTotalScore() << " vs. " + << check->GetHypothesis()->GetTotalScore()); } // Logging for the criminally insane diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 6bc82378ea..196f4d9971 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -105,7 +105,9 @@ void Manager::ProcessSentence() // some reporting on how long this took IFVERBOSE(1) { GetSentenceStats().StopTimeCollectOpts(); - TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took " << GetSentenceStats().GetTimeCollectOpts() << " seconds" << endl); + TRACE_ERR("Line "<< m_lineNumber << ": Collecting options took " + << GetSentenceStats().GetTimeCollectOpts() << " seconds at " + << __FILE__ << ":" << __LINE__ << endl); } // search for best translation with the specified algorithm diff --git a/moses/TranslationModel/UG/Jamfile b/moses/TranslationModel/UG/Jamfile index ecd175a653..c36d4a072b 100644 --- a/moses/TranslationModel/UG/Jamfile +++ b/moses/TranslationModel/UG/Jamfile @@ -20,6 +20,39 @@ $(TOP)/moses/TranslationModel/UG//mmsapt $(TOP)/util//kenutil ; +exe sim-pe : +sim-pe.cc +$(TOP)/moses//moses +$(TOP)/moses/TranslationModel/UG/generic//generic +$(TOP)//boost_iostreams +$(TOP)//boost_program_options +$(TOP)/moses/TranslationModel/UG/mm//mm +$(TOP)/moses/TranslationModel/UG//mmsapt +$(TOP)/util//kenutil +; + +exe spe-check-coverage : +spe-check-coverage.cc +$(TOP)/moses//moses +$(TOP)/moses/TranslationModel/UG/generic//generic +$(TOP)//boost_iostreams +$(TOP)//boost_program_options +$(TOP)/moses/TranslationModel/UG/mm//mm +$(TOP)/moses/TranslationModel/UG//mmsapt +$(TOP)/util//kenutil +; + +exe spe-check-coverage2 : +spe-check-coverage2.cc +$(TOP)/moses//moses +$(TOP)/moses/TranslationModel/UG/generic//generic +$(TOP)//boost_iostreams +$(TOP)//boost_program_options +$(TOP)/moses/TranslationModel/UG/mm//mm +$(TOP)/moses/TranslationModel/UG//mmsapt +$(TOP)/util//kenutil +; + install $(PREFIX)/bin : try-align ; -fakelib mmsapt : [ glob *.cpp mmsapt*.cc ] ; +fakelib mmsapt : [ glob *.cpp mmsapt*.cc sapt*.cc ] ; diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc new file mode 100644 index 0000000000..7dc2cd18f0 --- /dev/null +++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc @@ -0,0 +1,50 @@ +//-*- c++ -*- +#include "ug_splice_arglist.h" +#include "moses/Util.h" +#include "util/exception.hh" +#include + +namespace Moses { + + void + filter_arguments(int const argc_in, char const* const* const argv_in, + int & argc_moses, char*** argv_moses, + int & argc_other, char*** argv_other, + vector > const& filter) + { + *argv_moses = new char*[argc_in]; + *argv_other = new char*[argc_in]; + (*argv_moses)[0] = new char[strlen(argv_in[0])+1]; + strcpy((*argv_moses)[0], argv_in[0]); + argc_moses = 1; + argc_other = 0; + typedef pair option; + int i = 1; + while (i < argc_in) + { + BOOST_FOREACH(option const& o, filter) + { + if (o.first == argv_in[i]) + { + (*argv_other)[argc_other] = new char[strlen(argv_in[i])+1]; + strcpy((*argv_other)[argc_other++],argv_in[i]); + for (int k = 0; k < o.second; ++k) + { + UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-', + "[" << HERE << "] Missing argument for " + << "parameter " << o.first << "!"); + (*argv_other)[argc_other] = new char[strlen(argv_in[i])+1]; + strcpy((*argv_other)[argc_other++],argv_in[i]); + } + if (++i >= argc_in) break; + } + } + if (i >= argc_in) break; + (*argv_moses)[argc_moses] = new char[strlen(argv_in[i])+1]; + strcpy((*argv_moses)[argc_moses++], argv_in[i++]); + } + } + +} // namespace Moses + + diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h new file mode 100644 index 0000000000..e56585e8ab --- /dev/null +++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h @@ -0,0 +1,18 @@ +//-*- c++ -*- +#pragma once +#include +#include +namespace Moses { + using namespace std; + + // Function to splice the argument list (e.g. before handing it over to + // Moses LoadParam() function. /filter/ is a vector of argument names + // and the number of arguments after each of them + void + filter_arguments(int const argc_in, char const* const* const argv_in, + int & argc_moses, char*** argv_moses, + int & argc_other, char*** argv_other, + vector > const& filter); + + +} // namespace Moses diff --git a/moses/TranslationModel/UG/mm/Jamfile b/moses/TranslationModel/UG/mm/Jamfile index 2cc923581f..8d8af050a2 100644 --- a/moses/TranslationModel/UG/mm/Jamfile +++ b/moses/TranslationModel/UG/mm/Jamfile @@ -72,15 +72,15 @@ $(TOP)/moses/TranslationModel/UG/mm//mm $(TOP)/util//kenutil ; -exe custom-pt : -custom-pt.cc -$(TOP)/moses//moses -$(TOP)//boost_iostreams -$(TOP)//boost_program_options -$(TOP)/moses/TranslationModel/UG/mm//mm -$(TOP)/moses/TranslationModel/UG/generic//generic -$(TOP)/util//kenutil -; +# exe custom-pt : +# custom-pt.cc +# $(TOP)/moses//moses +# $(TOP)//boost_iostreams +# $(TOP)//boost_program_options +# $(TOP)/moses/TranslationModel/UG/mm//mm +# $(TOP)/moses/TranslationModel/UG/generic//generic +# $(TOP)/util//kenutil +# ; exe calc-coverage : @@ -98,7 +98,6 @@ mtt-dump mtt-count-words symal2mam mam2symal -custom-pt mmlex-build mmlex-lookup mam_verify diff --git a/moses/TranslationModel/UG/mm/custom-pt.cc b/moses/TranslationModel/UG/mm/custom-pt.cc index 1c1e0893c4..e52772b484 100644 --- a/moses/TranslationModel/UG/mm/custom-pt.cc +++ b/moses/TranslationModel/UG/mm/custom-pt.cc @@ -1,6 +1,6 @@ // build a phrase table for the given input // #include "ug_lexical_phrase_scorer2.h" - +#if 0 #include #include #include @@ -25,7 +25,7 @@ #include "ug_bitext.h" #include "../mmsapt_phrase_scorers.h" #include "ug_lexical_phrase_scorer2.h" - +#include "../sapt_phrase_scorers.h" using namespace std; using namespace ugdiss; using namespace Moses; @@ -109,6 +109,7 @@ nbest_phrasepairs(uint64_t const pid1, int main(int argc, char* argv[]) { // assert(argc == 4); +#if 0 #if 0 string base = argv[1]; string L1 = argv[2]; @@ -182,7 +183,7 @@ int main(int argc, char* argv[]) } } } - +#endif exit(0); } - +#endif diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc index 8dbbdcb926..a1a6dff7bf 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext.cc +++ b/moses/TranslationModel/UG/mm/ug_bitext.cc @@ -158,99 +158,25 @@ namespace Moses jstats:: invalidate() { - my_rcnt = 0; + if (my_wcnt > 0) + my_wcnt *= -1; } - bool + void jstats:: - valid() - { - return my_rcnt != 0; - } - - bool - PhrasePair:: - operator<=(PhrasePair const& other) const + validate() { - return this->score <= other.score; + if (my_wcnt < 0) + my_wcnt *= -1; } bool - PhrasePair:: - operator>=(PhrasePair const& other) const - { - return this->score >= other.score; - } - - bool - PhrasePair:: - operator<(PhrasePair const& other) const - { - return this->score < other.score; - } - - bool - PhrasePair:: - operator>(PhrasePair const& other) const - { - return this->score > other.score; - } - - PhrasePair:: - PhrasePair() {} - - PhrasePair:: - PhrasePair(PhrasePair const& o) - : p1(o.p1), - p2(o.p2), - raw1(o.raw1), - raw2(o.raw2), - sample1(o.sample1), - sample2(o.sample2), - good1(o.good1), - good2(o.good2), - joint(o.joint), - fvals(o.fvals), - aln(o.aln), - score(o.score) - { - for (size_t i = 0; i <= po_other; ++i) - { - dfwd[i] = o.dfwd[i]; - dbwd[i] = o.dbwd[i]; - } - } - - void - PhrasePair:: - init(uint64_t const pid1, pstats const& ps, size_t const numfeats) + jstats:: + valid() { - p1 = pid1; - p2 = 0; - raw1 = ps.raw_cnt; - sample1 = ps.sample_cnt; - sample2 = 0; - good1 = ps.good; - good2 = 0; - raw2 = 0; - fvals.resize(numfeats); + return my_wcnt >= 0; } - void - PhrasePair:: - init(uint64_t const pid1, - pstats const& ps1, - pstats const& ps2, - size_t const numfeats) - { - p1 = pid1; - raw1 = ps1.raw_cnt + ps2.raw_cnt; - sample1 = ps1.sample_cnt + ps2.sample_cnt; - sample2 = 0; - good1 = ps1.good + ps2.good; - good2 = 0; - fvals.resize(numfeats); - } float lbop(size_t const tries, size_t const succ, float const confidence) @@ -261,85 +187,6 @@ namespace Moses find_lower_bound_on_p(tries, succ, confidence))); } - PhrasePair const& - PhrasePair:: - update(uint64_t const pid2, jstats const& js) - { - p2 = pid2; - raw2 = js.cnt2(); - joint = js.rcnt(); - assert(js.aln().size()); - if (js.aln().size()) - aln = js.aln()[0].second; - float total_fwd = 0, total_bwd = 0; - for (int i = po_first; i <= po_other; i++) - { - PhraseOrientation po = static_cast(i); - total_fwd += js.dcnt_fwd(po)+1; - total_bwd += js.dcnt_bwd(po)+1; - } - for (int i = po_first; i <= po_other; i++) - { - PhraseOrientation po = static_cast(i); - dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd; - dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd; - } - return *this; - } - - PhrasePair const& - PhrasePair:: - update(uint64_t const pid2, jstats const& js1, jstats const& js2) - { - p2 = pid2; - raw2 = js1.cnt2() + js2.cnt2(); - joint = js1.rcnt() + js2.rcnt(); - assert(js1.aln().size() || js2.aln().size()); - if (js1.aln().size()) - aln = js1.aln()[0].second; - else if (js2.aln().size()) - aln = js2.aln()[0].second; - for (int i = po_first; i < po_other; i++) - { - PhraseOrientation po = static_cast(i); - dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other); - dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other); - } - return *this; - } - - PhrasePair const& - PhrasePair:: - update(uint64_t const pid2, - size_t const raw2extra, - jstats const& js) - { - p2 = pid2; - raw2 = js.cnt2() + raw2extra; - joint = js.rcnt(); - assert(js.aln().size()); - if (js.aln().size()) - aln = js.aln()[0].second; - for (int i = po_first; i <= po_other; i++) - { - PhraseOrientation po = static_cast(i); - dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other); - dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other); - } - return *this; - } - - float - PhrasePair:: - eval(vector const& w) - { - assert(w.size() == this->fvals.size()); - this->score = 0; - for (size_t i = 0; i < w.size(); ++i) - this->score += w[i] * this->fvals[i]; - return this->score; - } - template<> sptr > > imBitext >:: @@ -371,7 +218,8 @@ namespace Moses uint32_t row,col; char c; while (ibuf >> row >> c >> col) { - assert(c == '-'); + UTIL_THROW_IF2(c != '-', "[" << HERE << "] " + << "Error in alignment information:\n" << a); binwrite(obuf,row); binwrite(obuf,col); } @@ -639,7 +487,6 @@ namespace Moses cout << string(90,'-') << endl; } - PhraseOrientation find_po_fwd(vector >& a1, vector >& a2, @@ -654,13 +501,13 @@ namespace Moses ushort ns1,ne1,ne2; if (!expand_phrase_pair(a1,a2,n2,b1,e1,ns1,ne1,ne2)) - { - return po_other; - } + return po_other; + if (ns1 >= e1) { for (ushort j = e1; j < ns1; ++j) - if (a1[j].size()) return po_jfwd; + if (a1[j].size()) + return po_jfwd; return po_mono; } else diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h index 3972539737..4cb34c02d9 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext.h +++ b/moses/TranslationModel/UG/mm/ug_bitext.h @@ -56,6 +56,7 @@ namespace Moses { class Mmsapt; namespace bitext { + template class Bitext; using namespace ugdiss; template class Bitext; @@ -120,6 +121,7 @@ namespace Moses { void add(float w, vector const& a, uint32_t const cnt2, uint32_t fwd_orient, uint32_t bwd_orient); void invalidate(); + void validate(); bool valid(); uint32_t dcnt_fwd(PhraseOrientation const idx) const; uint32_t dcnt_bwd(PhraseOrientation const idx) const; @@ -157,43 +159,6 @@ namespace Moses { uint32_t fwd_o, uint32_t bwd_o); }; - class - PhrasePair - { - public: - uint64_t p1, p2; - uint32_t raw1,raw2,sample1,sample2,good1,good2,joint; - vector fvals; - float dfwd[po_other+1]; - float dbwd[po_other+1]; - vector aln; - // float avlex12,avlex21; // average lexical probs (Moses std) - // float znlex1,znlex2; // zens-ney lexical smoothing - // float colex1,colex2; // based on raw lexical occurrences - float score; - PhrasePair(); - PhrasePair(PhrasePair const& o); - bool operator<(PhrasePair const& other) const; - bool operator>(PhrasePair const& other) const; - bool operator<=(PhrasePair const& other) const; - bool operator>=(PhrasePair const& other) const; - - void init(uint64_t const pid1, pstats const& ps, size_t const numfeats); - void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2, - size_t const numfeats); - - PhrasePair const& - update(uint64_t const pid2, jstats const& js); - - PhrasePair const& - update(uint64_t const pid2, jstats const& js1, jstats const& js2); - - PhrasePair const& - update(uint64_t const pid2, size_t const raw2extra, jstats const& js); - - float eval(vector const& w); - }; - template class Bitext diff --git a/moses/TranslationModel/UG/mm/ug_im_ttrack.h b/moses/TranslationModel/UG/mm/ug_im_ttrack.h index 05066c922f..0c6e4afbf6 100644 --- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h +++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h @@ -16,6 +16,9 @@ #include "tpt_tokenindex.h" #include "ug_ttrack_base.h" #include "tpt_tokenindex.h" +#include "util/exception.hh" +#include "moses/Util.h" + // #include "ug_vocab.h" // define the corpus buffer size (in sentences) and the @@ -49,6 +52,8 @@ namespace ugdiss typename boost::shared_ptr > append(typename boost::shared_ptr > const & crp, vector const & snt); + void m_check_token_count(); // debugging function + public: imTtrack(boost::shared_ptr > > const& d); @@ -69,6 +74,22 @@ namespace ugdiss }; + template + void + imTtrack:: + m_check_token_count() + { // sanity check + size_t check = 0; + BOOST_FOREACH(vector const& s, *myData) + check += s.size(); + UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]" + << " Wrong token count after appending sentence!" + << " Counted " << check << " but expected " + << this->numToks << " in a total of " << myData->size() + << " sentences."); + + } + template Token const* imTtrack:: @@ -111,9 +132,9 @@ namespace ugdiss template imTtrack:: imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL) + : numToks(0) { myData.reset(new vector >()); - numToks = 0; string line,w; size_t linectr=0; boost::unordered_map H; @@ -135,6 +156,7 @@ namespace ugdiss template imTtrack:: imTtrack(size_t reserve) + : numToks(0) { myData.reset(new vector >()); if (reserve) myData->reserve(reserve); @@ -143,9 +165,9 @@ namespace ugdiss template imTtrack:: imTtrack(boost::shared_ptr > > const& d) + : numToks(0) { myData = d; - numToks = 0; BOOST_FOREACH(vector const& v, *d) numToks += v.size(); } @@ -171,6 +193,9 @@ namespace ugdiss shared_ptr > append(shared_ptr > const& crp, vector const & snt) { +#if 1 + if (crp) crp->m_check_token_count(); +#endif shared_ptr > ret; if (crp == NULL) { @@ -185,6 +210,11 @@ namespace ugdiss } else ret = crp; ret->myData->push_back(snt); + ret->numToks += snt.size(); + +#if 1 + ret->m_check_token_count(); +#endif return ret; } diff --git a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h index 558b5a7fa9..b7e3592233 100644 --- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h +++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h @@ -27,7 +27,6 @@ namespace ugdiss typedef mm2dTable table_t; table_t COOC; void open(string const& fname); - template void score(TKN const* snt1, size_t const s1, size_t const e1, @@ -104,7 +103,19 @@ namespace ugdiss if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0; UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__ << ": alpha parameter must be >= 0"); - return float(COOC[s][t]+alpha)/(COOC.m1(s)+alpha); + float ret = COOC[s][t]+alpha; + ret = (ret?ret:1.)/(COOC.m1(s)+alpha); + UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__ + << ": result not > 0 and <= 1. alpha = " << alpha << "; " + << COOC[s][t] << "/" << COOC.m1(s)); + +#if 0 + cerr << "[" << s << "," << t << "] " + << COOC.m1(s) << "/" + << COOC[s][t] << "/" + << COOC.m2(t) << endl; +#endif + return ret; } template @@ -115,7 +126,11 @@ namespace ugdiss if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0; UTIL_THROW_IF2(alpha < 0,"At " << __FILE__ << ":" << __LINE__ << ": alpha parameter must be >= 0"); - return float(COOC[s][t]+alpha)/(COOC.m2(t)+alpha); + float ret = float(COOC[s][t]+alpha); + ret = (ret?ret:1.)/(COOC.m2(t)+alpha); + UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__ + << ": result not > 0 and <= 1."); + return ret; } template diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.cc b/moses/TranslationModel/UG/mm/ug_phrasepair.cc new file mode 100644 index 0000000000..6373f84688 --- /dev/null +++ b/moses/TranslationModel/UG/mm/ug_phrasepair.cc @@ -0,0 +1,97 @@ +#include "ug_phrasepair.h" +namespace Moses { + namespace bitext + { + +#if 0 + void + PhrasePair:: + init() + { + p1 = p2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0; + } + + void + PhrasePair:: + init(uint64_t const pid1, + pstats const& ps1, + pstats const& ps2, + size_t const numfeats) + { + p1 = pid1; + raw1 = ps1.raw_cnt + ps2.raw_cnt; + sample1 = ps1.sample_cnt + ps2.sample_cnt; + sample2 = 0; + good1 = ps1.good + ps2.good; + good2 = 0; + joint = 0; + fvals.resize(numfeats); + } + + PhrasePair const& + PhrasePair:: + update(uint64_t const pid2, jstats const& js1, jstats const& js2) + { + p2 = pid2; + raw2 = js1.cnt2() + js2.cnt2(); + joint = js1.rcnt() + js2.rcnt(); + assert(js1.aln().size() || js2.aln().size()); + if (js1.aln().size()) + aln = js1.aln()[0].second; + else if (js2.aln().size()) + aln = js2.aln()[0].second; + for (int i = po_first; i < po_other; i++) + { + PhraseOrientation po = static_cast(i); + dfwd[i] = float(js1.dcnt_fwd(po) + js2.dcnt_fwd(po) + 1)/(sample1+po_other); + dbwd[i] = float(js1.dcnt_bwd(po) + js2.dcnt_bwd(po) + 1)/(sample1+po_other); + } + return *this; + } + + PhrasePair const& + PhrasePair:: + update(uint64_t const pid2, size_t r2) + { + p2 = pid2; + raw2 = r2; + joint = 0; + return *this; + } + + + PhrasePair const& + PhrasePair:: + update(uint64_t const pid2, + size_t const raw2extra, + jstats const& js) + { + p2 = pid2; + raw2 = js.cnt2() + raw2extra; + joint = js.rcnt(); + assert(js.aln().size()); + if (js.aln().size()) + aln = js.aln()[0].second; + for (int i = po_first; i <= po_other; i++) + { + PhraseOrientation po = static_cast(i); + dfwd[i] = float(js.dcnt_fwd(po)+1)/(sample1+po_other); + dbwd[i] = float(js.dcnt_bwd(po)+1)/(sample1+po_other); + } + return *this; + } + + float + PhrasePair:: + eval(vector const& w) + { + assert(w.size() == this->fvals.size()); + this->score = 0; + for (size_t i = 0; i < w.size(); ++i) + this->score += w[i] * this->fvals[i]; + return this->score; + } +#endif + } // namespace bitext +} // namespace Moses + diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h new file mode 100644 index 0000000000..8cd43dc187 --- /dev/null +++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h @@ -0,0 +1,243 @@ +//-*- c++ -*- +#pragma once +#include "ug_bitext.h" + +using namespace ugdiss; +using namespace std; + +namespace Moses { + namespace bitext + { + + template + string + toString(TokenIndex const& V, Token const* x, size_t const len) + { + if (!len) return ""; + UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!"); + ostringstream buf; + buf << V[x->id()]; + size_t i = 1; + for (x = x->next(); x && i < len; ++i, x = x->next()) + buf << " " << V[x->id()]; + UTIL_THROW_IF2(i != len, HERE << ": Unexpected end of phrase!"); + return buf.str(); + } + + template + class + PhrasePair + { + public: + Token const* start1; + Token const* start2; + uint32_t len1; + uint32_t len2; + // uint64_t p1, p2; + uint32_t raw1,raw2,sample1,sample2,good1,good2,joint; + vector fvals; + float dfwd[po_other+1]; // distortion counts // counts or probs? + float dbwd[po_other+1]; // distortion counts + vector aln; + float score; + PhrasePair() { }; + PhrasePair(PhrasePair const& o); + + PhrasePair const& operator+=(PhrasePair const& other); + + bool operator<(PhrasePair const& other) const; + bool operator>(PhrasePair const& other) const; + bool operator<=(PhrasePair const& other) const; + bool operator>=(PhrasePair const& other) const; + + void init(); + void init(Token const* x, uint32_t const len, + pstats const* ps = NULL, size_t const numfeats=0); + + // void init(uint64_t const pid1, pstats const& ps, size_t const numfeats); + // void init(uint64_t const pid1, pstats const& ps1, pstats const& ps2, + // size_t const numfeats); + + // PhrasePair const& + // update(uint64_t const pid2, size_t r2 = 0); + + PhrasePair const& + update(Token const* x, uint32_t const len, jstats const& js); + + // PhrasePair const& + // update(uint64_t const pid2, jstats const& js1, jstats const& js2); + + // PhrasePair const& + // update(uint64_t const pid2, size_t const raw2extra, jstats const& js); + + // float + // eval(vector const& w); + + class SortByTargetIdSeq + { + public: + int cmp(PhrasePair const& a, PhrasePair const& b) const; + bool operator()(PhrasePair const& a, PhrasePair const& b) const; + }; + }; + + template + void + PhrasePair:: + init(Token const* x, uint32_t const len, + pstats const* ps, size_t const numfeats) + { + start1 = x; len1 = len; + // p1 = pid1; + // p2 = 0; + if (ps) + { + raw1 = ps->raw_cnt; + sample1 = ps->sample_cnt; + good1 = ps->good; + } + else raw1 = sample1 = good1 = 0; + joint = 0; + good2 = 0; + sample2 = 0; + raw2 = 0; + fvals.resize(numfeats); + } + + template + PhrasePair const& + PhrasePair:: + update(Token const* x, uint32_t const len, jstats const& js) + { + // p2 = pid2; + start2 = x; len2 = len; + raw2 = js.cnt2(); + joint = js.rcnt(); + assert(js.aln().size()); + if (js.aln().size()) + aln = js.aln()[0].second; + float total_fwd = 0, total_bwd = 0; + for (int i = po_first; i <= po_other; i++) + { + PhraseOrientation po = static_cast(i); + total_fwd += js.dcnt_fwd(po)+1; + total_bwd += js.dcnt_bwd(po)+1; + } + + // should we do that here or leave the raw counts? + for (int i = po_first; i <= po_other; i++) + { + PhraseOrientation po = static_cast(i); + dfwd[i] = float(js.dcnt_fwd(po)+1)/total_fwd; + dbwd[i] = float(js.dcnt_bwd(po)+1)/total_bwd; + } + + return *this; + } + + template + bool + PhrasePair:: + operator<(PhrasePair const& other) const + { return this->score < other.score; } + + template + bool + PhrasePair:: + operator>(PhrasePair const& other) const + { return this->score > other.score; } + + template + bool + PhrasePair:: + operator<=(PhrasePair const& other) const + { return this->score <= other.score; } + + template + bool + PhrasePair:: + operator>=(PhrasePair const& other) const + { return this->score >= other.score; } + + template + PhrasePair const& + PhrasePair:: + operator+=(PhrasePair const& o) + { + raw1 += o.raw1; + raw2 += o.raw2; + sample1 += o.sample1; + sample2 += o.sample2; + good1 += o.good1; + good2 += o.good2; + joint += o.joint; + return *this; + } + + template + PhrasePair:: + PhrasePair(PhrasePair const& o) + : start1(o.start1) + , start2(o.start2) + , len1(o.len1) + , len2(o.len2) + , raw1(o.raw1) + , raw2(o.raw2) + , sample1(o.sample1) + , sample2(o.sample2) + , good1(o.good1) + , good2(o.good2) + , joint(o.joint) + , fvals(o.fvals) + , aln(o.aln) + , score(o.score) + { + for (size_t i = 0; i <= po_other; ++i) + { + dfwd[i] = o.dfwd[i]; + dbwd[i] = o.dbwd[i]; + } + } + + template + int + PhrasePair:: + SortByTargetIdSeq:: + cmp(PhrasePair const& a, PhrasePair const& b) const + { + size_t i = 0; + Token const* x = a.start2; + Token const* y = b.start2; + while (i < a.len2 && i < b.len2 && x->id() == y->id()) + { + x = x->next(); + y = y->next(); + ++i; + } + if (i == a.len2 && i == b.len2) return 0; + if (i == a.len2) return -1; + if (i == b.len2) return 1; + return x->id() < y->id() ? -1 : 1; + } + + template + bool + PhrasePair:: + SortByTargetIdSeq:: + operator()(PhrasePair const& a, PhrasePair const& b) const + { + return this->cmp(a,b) < 0; + } + + template + void + PhrasePair:: + init() + { + len1 = len2 = raw1 = raw2 = sample1 = sample2 = good1 = good2 = joint = 0; + start1 = start2 = NULL; + } + + + } // namespace bitext +} // namespace Moses diff --git a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h index 14bf6cdadb..ab7f96bf0b 100644 --- a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h +++ b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h @@ -7,6 +7,8 @@ #include "ug_typedefs.h" #include "tpt_tokenindex.h" #include +#include "util/exception.hh" +#include "moses/Util.h" //#include // #include "ug_bv_iter.h" @@ -60,8 +62,13 @@ namespace ugdiss // TSA_tree_iterator(TSA_tree_iterator const& other); TSA_tree_iterator(TSA const* s); + TSA_tree_iterator(TSA const* s, TSA_tree_iterator const& other); TSA_tree_iterator(TSA const* r, id_type const* s, size_t const len); // TSA_tree_iterator(TSA const* s, Token const& t); + TSA_tree_iterator(TSA const* s, + Token const* kstart, + size_t const len, + bool full_match_only=true); TSA_tree_iterator(TSA const* s, Token const* kstart, Token const* kend, @@ -150,9 +157,12 @@ namespace ugdiss double approxOccurrenceCount(int p=-1) const { assert(root); + if (p < 0) p += lower.size(); double ret = arrayByteSpanSize(p)/root->aveIndexEntrySize(); - assert(ret < root->corpus->numTokens()); if (ret < 25) ret = rawCnt(p); + UTIL_THROW_IF2(ret > root->corpus->numTokens(), "[" << HERE << "] " + << "Word count mismatch."); + assert(ret <= root->corpus->numTokens()); return ret; } @@ -318,6 +328,18 @@ namespace ugdiss : root(s) {}; + template + TSA_tree_iterator:: + TSA_tree_iterator(TSA const* s, TSA_tree_iterator const& other) + : root(s) + { + Token const* x = other.getToken(0); + for (size_t i = 0; i < other.size() && this->extend(x->id()); ++i) + x = x->next(); + }; + + + template TSA_tree_iterator:: TSA_tree_iterator @@ -382,6 +404,25 @@ namespace ugdiss #endif + template + TSA_tree_iterator:: + TSA_tree_iterator(TSA const* s, Token const* kstart, + size_t const len, bool full_match_only) + : root(s) + { + if (!root) return; + size_t i = 0; + for (; i < len && kstart && extend(*kstart); ++i) + kstart = kstart->next(); + if (full_match_only && i != len) + { + lower.clear(); + upper.clear(); + } + }; + + // DEPRECATED: DO NOT USE. Use the one that takes the length + // instead of kend. template TSA_tree_iterator:: TSA_tree_iterator(TSA const* s, Token const* kstart, @@ -561,8 +602,7 @@ namespace ugdiss TSA_tree_iterator:: rawCnt(int p) const { - if (p < 0) - p = lower.size()+p; + if (p < 0) p += lower.size(); assert(p>=0); if (lower.size() == 0) return root->getCorpusSize(); return root->rawCnt(lower[p],upper[p]); diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index dc99454728..596fec4e6c 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -1,13 +1,38 @@ #include "mmsapt.h" #include +#include #include #include +#include "moses/TranslationModel/UG/mm/ug_phrasepair.h" +#include "util/exception.hh" +#include namespace Moses { using namespace bitext; using namespace std; using namespace boost; + + + // uint64_t + // pack_phrasekey(uint64_t const shard_id, uint64_t const snt_id, + // uint64_t const offset, uint64_t const len) + // { + // uint64_t one = 1; + // // 8 bits - 256 shards + // // 13 bits - max offset + // // 11 bits - max len + // // 32 bits - max sentence id + // UTIL_TRHOW_IF2(shard_id >= 256, "[" << HERE << "] " + // << "Sentence ID exceeds limit."); + // UTIL_THROW_IF2(snt_id >= 4294967296, "[" << HERE << "] " + // << "Sentence ID exceeds limit."); + // UTIL_TRHOW_IF2(offset >= 8192, "[" << HERE << "]" + // << "Phrase offset exceeds limit."); + // UTIL_TRHOW_IF2(offset >= 2048, "[" << HERE << "]" + // << "Phrase length exceeds limit."); + // return ((shard_id<<56)+(snt_id<<24)+(offset<<11)+len); + // } void fillIdSeq(Phrase const& mophrase, size_t const ifactor, @@ -23,7 +48,7 @@ namespace Moses void - parseLine(string const& line, map & params) + parseLine(string const& line, map & param) { char_separator sep("; "); tokenizer > tokens(line,sep); @@ -32,9 +57,14 @@ namespace Moses size_t i = t.find_first_not_of(" ="); size_t j = t.find_first_of(" =",i+1); size_t k = t.find_first_not_of(" =",j+1); + UTIL_THROW_IF2(i == string::npos || k == string::npos, + "[" << HERE << "] " + << "Parameter specification error near '" + << t << "' in moses ini line\n" + << line); assert(i != string::npos); assert(k != string::npos); - params[t.substr(i,j)] = t.substr(k); + param[t.substr(i,j)] = t.substr(k); } } @@ -57,13 +87,13 @@ namespace Moses Mmsapt:: Mmsapt(string const& line) : PhraseDictionary(line) - , m_lex_alpha(1.0) - , withLogCountFeatures(false) - , withCoherence(true) - , m_pfwd_features("g") - , m_pbwd_features("g") - , withPbwd(true) - , poolCounts(true) + // , m_lex_alpha(1.0) + // , withLogCountFeatures(false) + // , withCoherence(true) + // , m_pfwd_features("g") + // , m_pbwd_features("g") + // , withPbwd(true) + // , poolCounts(true) , ofactor(1,0) , m_tpc_ctr(0) { @@ -92,83 +122,127 @@ namespace Moses } } + void + Mmsapt:: + register_ff(sptr const& ff, vector > & registry) + { + registry.push_back(ff); + ff->setIndex(m_feature_names.size()); + for (int i = 0; i < ff->fcnt(); ++i) + { + m_feature_names.push_back(ff->fname(i)); + m_is_logval.push_back(ff->isLogVal(i)); + m_is_integer.push_back(ff->isIntegerValued(i)); + } + } + + bool + Mmsapt:: + isLogVal(int i) const { return m_is_logval.at(i); } + + bool + Mmsapt:: + isInteger(int i) const { return m_is_integer.at(i); } + void Mmsapt:: init(string const& line) { map::const_iterator m; - map param; - parseLine(line,param); + parseLine(line,this->param); + + this->m_numScoreComponents = atoi(param["num-features"].c_str()); m = param.find("config"); if (m != param.end()) read_config_file(m->second,param); - - bname = param["base"]; + + bname = param["base"]; L1 = param["L1"]; L2 = param["L2"]; - assert(bname.size()); - assert(L1.size()); - assert(L2.size()); - - m = param.find("pfwd-denom"); - m_pfwd_denom = m != param.end() ? m->second[0] : 's'; - - m = param.find("smooth"); - m_lbop_parameter = m != param.end() ? atof(m->second.c_str()) : .05; - m = param.find("max-samples"); - m_default_sample_size = m != param.end() ? atoi(m->second.c_str()) : 1000; + UTIL_THROW_IF2(bname.size() == 0, "Missing corpus base name at " << HERE); + UTIL_THROW_IF2(L1.size() == 0, "Missing L1 tag at " << HERE); + UTIL_THROW_IF2(L2.size() == 0, "Missing L2 tag at " << HERE); - if ((m = param.find("logcnt-features")) != param.end()) - withLogCountFeatures = m->second != "0"; - - if ((m = param.find("coh")) != param.end()) - withCoherence = m->second != "0"; - - if ((m = param.find("pfwd")) != param.end()) - m_pfwd_features = (m->second == "0" ? "" : m->second); - - if (m_pfwd_features == "1") // legacy; deprecated - m_pfwd_features[0] = m_pfwd_denom; + // set defaults for all parameters if not specified so far + pair dflt("input-factor","0"); + input_factor = atoi(param.insert(dflt).first->second.c_str()); + // shouldn't that be a string? - if ((m = param.find("pbwd")) != param.end()) - m_pbwd_features = (m->second == "0" ? "" : m->second); + dflt = pair ("smooth",".01"); + m_lbop_conf = atof(param.insert(dflt).first->second.c_str()); - if (m_pbwd_features == "1") - m_pbwd_features = "r"; // lecagy; deprecated + dflt = pair ("lexalpha","0"); + m_lex_alpha = atof(param.insert(dflt).first->second.c_str()); - if ((m = param.find("lexalpha")) != param.end()) - m_lex_alpha = atof(m->second.c_str()); + dflt = pair ("sample","1000"); + m_default_sample_size = atoi(param.insert(dflt).first->second.c_str()); - m = param.find("workers"); - m_workers = m != param.end() ? atoi(m->second.c_str()) : 8; + dflt = pair("workers","8"); + m_workers = atoi(param.insert(dflt).first->second.c_str()); m_workers = min(m_workers,24UL); - if ((m = param.find("limit")) != param.end()) - m_tableLimit = atoi(m->second.c_str()); + dflt = pair("limit","20"); + m_tableLimit = atoi(param.insert(dflt).first->second.c_str()); - m = param.find("cache-size"); - m_history.reserve(m != param.end()?max(1000,atoi(m->second.c_str())):10000); + dflt = pair("cache","10000"); + size_t hsize = max(1000,atoi(param.insert(dflt).first->second.c_str())); + m_history.reserve(hsize); // in plain language: cache size is at least 1000, and 10,000 by default // this cache keeps track of the most frequently used target phrase collections // even when not actively in use - - this->m_numScoreComponents = atoi(param["num-features"].c_str()); - m = param.find("ifactor"); - input_factor = m != param.end() ? atoi(m->second.c_str()) : 0; + // Feature functions are initialized in function Load(); + param.insert(pair("pfwd", "g")); + param.insert(pair("pbwd", "g")); + param.insert(pair("logcnt", "0")); + param.insert(pair("coh", "0")); + param.insert(pair("rare", "1")); + param.insert(pair("prov", "1")); poolCounts = true; if ((m = param.find("extra")) != param.end()) extra_data = m->second; + // check for unknown parameters + vector known_parameters; known_parameters.reserve(50); + known_parameters.push_back("L1"); + known_parameters.push_back("L2"); + known_parameters.push_back("Mmsapt"); + known_parameters.push_back("base"); + known_parameters.push_back("cache"); + known_parameters.push_back("coh"); + known_parameters.push_back("config"); + known_parameters.push_back("extra"); + known_parameters.push_back("input-factor"); + known_parameters.push_back("lexalpha"); + known_parameters.push_back("limit"); + known_parameters.push_back("logcnt"); + known_parameters.push_back("name"); + known_parameters.push_back("num-features"); + known_parameters.push_back("output-factor"); + known_parameters.push_back("pbwd"); + known_parameters.push_back("pfwd"); + known_parameters.push_back("prov"); + known_parameters.push_back("rare"); + known_parameters.push_back("sample"); + known_parameters.push_back("smooth"); + known_parameters.push_back("unal"); + known_parameters.push_back("workers"); + for (map::iterator m = param.begin(); m != param.end(); ++m) + { + UTIL_THROW_IF2(!binary_search(known_parameters.begin(), + known_parameters.end(), m->first), + HERE << ": Unknown parameter specification for Mmsapt: " + << m->first); + } } void Mmsapt:: - load_extra_data(string bname) + load_extra_data(string bname, bool locking = true) { // TO DO: ADD CHECKS FOR ROBUSTNESS // - file existence? @@ -186,122 +260,120 @@ namespace Moses while(getline(in2,line)) text2.push_back(line); while(getline(ina,line)) symal.push_back(line); - lock_guard guard(this->lock); + boost::scoped_ptr > guard; + if (locking) guard.reset(new lock_guard(this->lock)); btdyn = btdyn->add(text1,text2,symal); assert(btdyn); // cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl; } - size_t + template + void Mmsapt:: - add_corpus_specific_features - (vector >& ffvec, size_t num_feats) + check_ff(string const ffname, vector >* registry) { - float const lbop = m_lbop_parameter; // just for code readability below - // for the time being, we assume that all phrase probability features - // use the same confidence parameter for lower-bound-estimation - for (size_t i = 0; i < m_pfwd_features.size(); ++i) - { - UTIL_THROW_IF2(m_pfwd_features[i] != 'g' && - m_pfwd_features[i] != 'r' && - m_pfwd_features[i] != 's', - "Can't handle pfwd feature type '" - << m_pfwd_features[i] << "'."); - sptr > ff(new PScorePfwd()); - size_t k = num_feats; - num_feats = ff->init(num_feats,lbop,m_pfwd_features[i]); - for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); - ffvec.push_back(ff); + string const& spec = param[ffname]; + if (spec == "" || spec == "0") return; + if (registry) + { + sptr ff(new fftype(spec)); + register_ff(ff, *registry); } - - for (size_t i = 0; i < m_pbwd_features.size(); ++i) - { - UTIL_THROW_IF2(m_pbwd_features[i] != 'g' && - m_pbwd_features[i] != 'r' && - m_pbwd_features[i] != 's', - "Can't handle pbwd feature type '" - << m_pbwd_features[i] << "'."); - sptr > ff(new PScorePbwd()); - size_t k = num_feats; - num_feats = ff->init(num_feats,lbop,m_pbwd_features[i]); - for (;k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); - ffvec.push_back(ff); + else if (spec[spec.size()-1] == '+') // corpus specific + { + sptr ff(new fftype(spec)); + register_ff(ff, m_active_ff_fix); + ff.reset(new fftype(spec)); + register_ff(ff, m_active_ff_dyn); } - - // if (withPbwd) - // { - // sptr > ff(new PScorePbwd()); - // size_t k = num_feats; - // num_feats = ff->init(num_feats,lbop); - // for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); - // ffvec.push_back(ff); - // } - - if (withLogCountFeatures) + else { - sptr > ff(new PScoreLogCounts()); - size_t k = num_feats; - num_feats = ff->init(num_feats); - for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); - ffvec.push_back(ff); + sptr ff(new fftype(spec)); + register_ff(ff, m_active_ff_common); } + } - return num_feats; + template + void + Mmsapt:: + check_ff(string const ffname, float const xtra, vector >* registry) + { + string const& spec = param[ffname]; + if (spec == "" || spec == "0") return; + if (registry) + { + sptr ff(new fftype(xtra,spec)); + register_ff(ff, *registry); + } + else if (spec[spec.size()-1] == '+') // corpus specific + { + sptr ff(new fftype(xtra,spec)); + register_ff(ff, m_active_ff_fix); + ff.reset(new fftype(xtra,spec)); + register_ff(ff, m_active_ff_dyn); + } + else + { + sptr ff(new fftype(xtra,spec)); + register_ff(ff, m_active_ff_common); + } } + // void + // Mmsapt:: + // add_corpus_specific_features(vector >& registry) + // { + // check_ff >("pbwd",m_lbop_conf,registry); + // check_ff >("logcnt",registry); + // } + void Mmsapt:: Load() { + lock_guard guard(this->lock); + + // can load only once + // UTIL_THROW_IF2(shards.size(),"Mmsapt is already loaded at " << HERE); + + // lexical scores + string lexfile = bname + L1 + "-" + L2 + ".lex"; + sptr > ff(new PScoreLex1(param["lex_alpha"],lexfile)); + register_ff(ff,m_active_ff_common); + + // these are always computed on pooled data + check_ff > ("rare", &m_active_ff_common); + check_ff >("unal", &m_active_ff_common); + check_ff >("coh", &m_active_ff_common); + + // for these ones either way is possible (specification ends with '+' + // if corpus-specific + check_ff >("pfwd", m_lbop_conf); + check_ff >("pbwd", m_lbop_conf); + check_ff >("logcnt"); + + // These are always corpus-specific + check_ff >("prov", &m_active_ff_fix); + check_ff >("prov", &m_active_ff_dyn); + + UTIL_THROW_IF2(this->m_feature_names.size() != this->m_numScoreComponents, + "At " << HERE << ": number of feature values provided by " + << "Phrase table (" << this->m_feature_names.size() + << ") does not match number specified in Moses config file (" + << this->m_numScoreComponents << ")!\n";); + + // Load corpora. For the time being, we can have one memory-mapped static + // corpus and one in-memory dynamic corpus + // sptr btfix(new mmbitext()); btfix.num_workers = this->m_workers; btfix.open(bname, L1, L2); btfix.setDefaultSampleSize(m_default_sample_size); + // shards.push_back(btfix); - size_t num_feats = 0; - - // lexical scores are currently always active - sptr > ff(new PScoreLex(m_lex_alpha)); - size_t k = num_feats; - num_feats = ff->init(num_feats, bname + L1 + "-" + L2 + ".lex"); - for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); - m_active_ff_common.push_back(ff); - - if (withCoherence) - { - sptr > ff(new PScoreCoherence()); - size_t k = num_feats; - num_feats = ff->init(num_feats); - for (; k < num_feats; ++k) m_feature_names.push_back(ff->fname(k)); - m_active_ff_common.push_back(ff); - } - - num_feats = add_corpus_specific_features(m_active_ff_fix,num_feats); - // cerr << num_feats << "/" << this->m_numScoreComponents - // << " at " << __FILE__ << ":" << __LINE__ << endl; - poolCounts = poolCounts && num_feats == this->m_numScoreComponents; - if (!poolCounts) - num_feats = add_corpus_specific_features(m_active_ff_dyn, num_feats); - -#if 0 - cerr << "MMSAPT provides " << num_feats << " features at " - << __FILE__ << ":" << __LINE__ << endl; - BOOST_FOREACH(string const& fname, m_feature_names) - cerr << fname << endl; -#endif - UTIL_THROW_IF2(num_feats != this->m_numScoreComponents, - "At " << __FILE__ << ":" << __LINE__ - << ": number of feature values provided by Phrase table (" - << num_feats << ") does not match number specified in " - << "Moses config file (" << this->m_numScoreComponents - << ")!\n";); - - - btdyn.reset(new imBitext(btfix.V1, btfix.V2,m_default_sample_size)); + btdyn.reset(new imbitext(btfix.V1, btfix.V2, m_default_sample_size)); btdyn->num_workers = this->m_workers; if (extra_data.size()) - { - load_extra_data(extra_data); - } + load_extra_data(extra_data,false); #if 0 // currently not used @@ -330,258 +402,345 @@ namespace Moses TargetPhrase* Mmsapt:: - createTargetPhrase(Phrase const& src, - Bitext const& bt, - PhrasePair const& pp) const + mkTPhrase(Phrase const& src, + PhrasePair* fix, + PhrasePair* dyn, + sptr > const& dynbt) const { - Word w; uint32_t sid,off,len; + UTIL_THROW_IF2(!fix && !dyn, HERE << + ": Can't create target phrase from nothing."); + vector fvals(this->m_numScoreComponents); + PhrasePair pool = fix ? *fix : *dyn; + if (fix) + { + BOOST_FOREACH(sptr const& ff, m_active_ff_fix) + (*ff)(btfix, *fix, &fvals); + } + if (dyn) + { + BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) + (*ff)(*dynbt, *dyn, &fvals); + } + + if (fix && dyn) { pool += *dyn; } + else if (fix) + { + PhrasePair zilch; zilch.init(); + TSA::tree_iterator m(dynbt->I2.get(), fix->start2, fix->len2); + if (m.size() == fix->len2) + zilch.raw2 = m.approxOccurrenceCount(); + pool += zilch; + BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) + (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals); + } + else if (dyn) + { + PhrasePair zilch; zilch.init(); + TSA::tree_iterator m(btfix.I2.get(), dyn->start2, dyn->len2); + if (m.size() == dyn->len2) + zilch.raw2 = m.approxOccurrenceCount(); + pool += zilch; + BOOST_FOREACH(sptr const& ff, m_active_ff_fix) + (*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals); + } + if (fix) + { + BOOST_FOREACH(sptr const& ff, m_active_ff_common) + (*ff)(btfix, pool, &fvals); + } + else + { + BOOST_FOREACH(sptr const& ff, m_active_ff_common) + (*ff)(*dynbt, pool, &fvals); + } TargetPhrase* tp = new TargetPhrase(); - parse_pid(pp.p2, sid, off, len); - Token const* x = bt.T2->sntStart(sid) + off; - for (uint32_t k = 0; k < len; ++k) + Token const* x = fix ? fix->start2 : dyn->start2; + uint32_t len = fix ? fix->len2 : dyn->len2; + for (uint32_t k = 0; k < len; ++k, x = x->next()) { - // cerr << (*bt.V2)[x[k].id()] << " at " << __FILE__ << ":" << __LINE__ << endl; - StringPiece wrd = (*bt.V2)[x[k].id()]; - // if ((off+len) > bt.T2->sntLen(sid)) - // cerr << off << ";" << len << " " << bt.T2->sntLen(sid) << endl; - assert(off+len <= bt.T2->sntLen(sid)); - w.CreateFromString(Output,ofactor,wrd,false); + StringPiece wrd = (*(btfix.V2))[x->id()]; + Word w; w.CreateFromString(Output,ofactor,wrd,false); tp->AddWord(w); } - tp->GetScoreBreakdown().Assign(this, pp.fvals); + tp->GetScoreBreakdown().Assign(this, fvals); tp->Evaluate(src); return tp; } - // process phrase stats from a single parallel corpus - void - Mmsapt:: - process_pstats - (Phrase const& src, - uint64_t const pid1, - pstats const& stats, - Bitext const & bt, - TargetPhraseCollection* tpcoll - ) const - { - PhrasePair pp; - pp.init(pid1, stats, this->m_numScoreComponents); - pstats::trg_map_t::const_iterator t; - for (t = stats.trg.begin(); t != stats.trg.end(); ++t) - { - pp.update(t->first,t->second); - BOOST_FOREACH(sptr const& ff, m_active_ff_fix) - (*ff)(bt,pp); - BOOST_FOREACH(sptr const& ff, m_active_ff_common) - (*ff)(bt,pp); - tpcoll->Add(createTargetPhrase(src,bt,pp)); - } - } + // TargetPhrase* + // Mmsapt:: + // mkTPhrase(Phrase const& src, + // Bitext const& bt, + // PhrasePair const& pp) const + // { + // Word w; uint32_t sid,off,len; + // TargetPhrase* tp = new TargetPhrase(); + // parse_pid(pp.p2, sid, off, len); + // Token const* x = bt.T2->sntStart(sid) + off; + // for (uint32_t k = 0; k < len; ++k) + // { + // // cerr << (*bt.V2)[x[k].id()] << " at " << __FILE__ << ":" << __LINE__ << endl; + // StringPiece wrd = (*bt.V2)[x[k].id()]; + // // if ((off+len) > bt.T2->sntLen(sid)) + // // cerr << off << ";" << len << " " << bt.T2->sntLen(sid) << endl; + // assert(off+len <= bt.T2->sntLen(sid)); + // w.CreateFromString(Output,ofactor,wrd,false); + // tp->AddWord(w); + // } + // tp->GetScoreBreakdown().Assign(this, pp.fvals); + // tp->Evaluate(src); + // return tp; + // } + + // // process phrase stats from a single parallel corpus + // void + // Mmsapt:: + // process_pstats + // (Phrase const& src, + // uint64_t const pid1, + // pstats const& stats, + // Bitext const & bt, + // TargetPhraseCollection* tpcoll + // ) const + // { + // PhrasePair pp; + // pp.init(pid1, stats, this->m_numScoreComponents); + // pstats::trg_map_t::const_iterator t; + // for (t = stats.trg.begin(); t != stats.trg.end(); ++t) + // { + // pp.update(t->first,t->second); + // BOOST_FOREACH(sptr const& ff, m_active_ff_fix) + // (*ff)(bt,pp); + // BOOST_FOREACH(sptr const& ff, m_active_ff_common) + // (*ff)(bt,pp); + // tpcoll->Add(mkTPhrase(src,bt,pp)); + // } + // } + + // void + // Mmsapt:: + // ScorePPfix(PhrasePair& pp) const + // { + // BOOST_FOREACH(sptr const& ff, m_active_ff_fix) + // (*ff)(btfix,pp); + // BOOST_FOREACH(sptr const& ff, m_active_ff_common) + // (*ff)(btfix,pp); + // } + +// // process phrase stats from a single parallel corpus +// bool +// Mmsapt:: +// pool_pstats(Phrase const& src, +// uint64_t const pid1a, +// pstats * statsa, +// Bitext const & bta, +// uint64_t const pid1b, +// pstats const* statsb, +// Bitext const & btb, +// TargetPhraseCollection* tpcoll) const +// { +// PhrasePair pp; +// if (statsa && statsb) +// pp.init(pid1b, *statsa, *statsb, this->m_numScoreComponents); +// else if (statsa) +// pp.init(pid1a, *statsa, this->m_numScoreComponents); +// else if (statsb) +// pp.init(pid1b, *statsb, this->m_numScoreComponents); +// else return false; // throw "no stats for pooling available!"; + +// pstats::trg_map_t::const_iterator b; +// pstats::trg_map_t::iterator a; +// if (statsb) +// { +// for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b) +// { +// uint32_t sid,off,len; +// parse_pid(b->first, sid, off, len); +// Token const* x = btb.T2->sntStart(sid) + off; +// TSA::tree_iterator m(bta.I2.get(),x,x+len); +// if (m.size() == len) +// { +// ; +// if (statsa && ((a = statsa->trg.find(m.getPid())) +// != statsa->trg.end())) +// { +// pp.update(b->first,a->second,b->second); +// a->second.invalidate(); +// } +// else +// pp.update(b->first,m.approxOccurrenceCount(), +// b->second); +// } +// else pp.update(b->first,b->second); +// BOOST_FOREACH(sptr const& ff, m_active_ff_fix) +// (*ff)(btb,pp); +// BOOST_FOREACH(sptr const& ff, m_active_ff_common) +// (*ff)(btb,pp); +// tpcoll->Add(mkTPhrase(src,btb,pp)); +// } +// } +// if (!statsa) return statsb != NULL; +// for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a) +// { +// uint32_t sid,off,len; +// if (!a->second.valid()) continue; +// parse_pid(a->first, sid, off, len); +// if (btb.T2) +// { +// Token const* x = bta.T2->sntStart(sid) + off; +// TSA::tree_iterator m(btb.I2.get(), x, len); +// if (m.size() == len) +// pp.update(a->first,m.approxOccurrenceCount(),a->second); +// else +// pp.update(a->first,a->second); +// } +// else pp.update(a->first,a->second); +// #if 0 +// // jstats const& j = a->second; +// cerr << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " +// << bta.T2->pid2str(bta.V2.get(),pp.p2) << endl; +// cerr << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " " +// << pp.joint << " " << pp.raw2 << endl; +// #endif + +// UTIL_THROW_IF2(pp.raw2 == 0, +// "OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " +// << bta.T2->pid2str(bta.V2.get(),pp.p2) << ": " +// << pp.raw1 << " " << pp.sample1 << " " +// << pp.good1 << " " << pp.joint << " " +// << pp.raw2); +// BOOST_FOREACH(sptr const& ff, m_active_ff_fix) +// (*ff)(bta,pp); +// BOOST_FOREACH(sptr const& ff, m_active_ff_common) +// (*ff)(bta,pp); +// tpcoll->Add(mkTPhrase(src,bta,pp)); +// } +// return true; +// } - void - Mmsapt:: - ScorePPfix(bitext::PhrasePair& pp) const - { - BOOST_FOREACH(sptr const& ff, m_active_ff_fix) - (*ff)(btfix,pp); - BOOST_FOREACH(sptr const& ff, m_active_ff_common) - (*ff)(btfix,pp); - } - // process phrase stats from a single parallel corpus - bool - Mmsapt:: - pool_pstats(Phrase const& src, - uint64_t const pid1a, - pstats * statsa, - Bitext const & bta, - uint64_t const pid1b, - pstats const* statsb, - Bitext const & btb, - TargetPhraseCollection* tpcoll) const - { - PhrasePair pp; - if (statsa && statsb) - pp.init(pid1b, *statsa, *statsb, this->m_numScoreComponents); - else if (statsa) - pp.init(pid1a, *statsa, this->m_numScoreComponents); - else if (statsb) - pp.init(pid1b, *statsb, this->m_numScoreComponents); - else return false; // throw "no stats for pooling available!"; - - pstats::trg_map_t::const_iterator b; - pstats::trg_map_t::iterator a; - if (statsb) - { - for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b) - { - uint32_t sid,off,len; - parse_pid(b->first, sid, off, len); - Token const* x = bta.T2->sntStart(sid) + off; - TSA::tree_iterator m(bta.I2.get(),x,x+len); - if (m.size() == len) - { - ; - if (statsa && ((a = statsa->trg.find(m.getPid())) - != statsa->trg.end())) - { - pp.update(b->first,a->second,b->second); - a->second.invalidate(); - } - else - pp.update(b->first,m.approxOccurrenceCount(), - b->second); - } - else pp.update(b->first,b->second); - BOOST_FOREACH(sptr const& ff, m_active_ff_fix) - (*ff)(btb,pp); - BOOST_FOREACH(sptr const& ff, m_active_ff_common) - (*ff)(btb,pp); - tpcoll->Add(createTargetPhrase(src,btb,pp)); - } - } - if (!statsa) return statsb != NULL; - for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a) - { - uint32_t sid,off,len; - if (!a->second.valid()) continue; - parse_pid(a->first, sid, off, len); - if (btb.T2) - { - Token const* x = bta.T2->sntStart(sid) + off; - TSA::tree_iterator m(btb.I2.get(), x, x+len); - if (m.size() == len) - pp.update(a->first,m.approxOccurrenceCount(),a->second); - else - pp.update(a->first,a->second); - } - else - pp.update(a->first,a->second); -#if 0 - // jstats const& j = a->second; - cerr << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " - << bta.T2->pid2str(bta.V2.get(),pp.p2) << endl; - cerr << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " " - << pp.joint << " " << pp.raw2 << endl; -#endif - UTIL_THROW_IF2(pp.raw2 == 0, - "OOPS" << bta.T1->pid2str(bta.V1.get(),pp.p1) << " ::: " - << bta.T2->pid2str(bta.V2.get(),pp.p2) << ": " - << pp.raw1 << " " << pp.sample1 << " " - << pp.good1 << " " << pp.joint << " " - << pp.raw2); - BOOST_FOREACH(sptr const& ff, m_active_ff_fix) - (*ff)(bta,pp); - BOOST_FOREACH(sptr const& ff, m_active_ff_common) - (*ff)(bta,pp); - tpcoll->Add(createTargetPhrase(src,bta,pp)); - } - return true; - } - - // process phrase stats from a single parallel corpus - bool - Mmsapt:: - combine_pstats - (Phrase const& src, - uint64_t const pid1a, pstats * statsa, Bitext const & bta, - uint64_t const pid1b, pstats const* statsb, Bitext const & btb, - TargetPhraseCollection* tpcoll) const - { - PhrasePair ppfix,ppdyn,pool; - // ppfix: counts from btfix - // ppdyn: counts from btdyn - // pool: pooled counts from both - Word w; - if (statsa) ppfix.init(pid1a,*statsa,this->m_numScoreComponents); - if (statsb) ppdyn.init(pid1b,*statsb,this->m_numScoreComponents); - pstats::trg_map_t::const_iterator b; - pstats::trg_map_t::iterator a; - - if (statsb) - { - pool.init(pid1b,*statsb,0); - for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b) - { - ppdyn.update(b->first,b->second); - BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) - (*ff)(btb,ppdyn); + // // process phrase stats from a single parallel corpus + // bool + // Mmsapt:: + // combine_pstats + // (Phrase const& src, + // uint64_t const pid1a, pstats * statsa, Bitext const & bta, + // uint64_t const pid1b, pstats const* statsb, Bitext const & btb, + // TargetPhraseCollection* tpcoll) const + // { + // if (!statsa && !statsb) return false; + + // PhrasePair ppfix,ppdyn,pool; Word w; + // // ppfix: counts from btfix + // // ppdyn: counts from btdyn + // // pool: pooled counts from both + + // pstats::trg_map_t::const_iterator b; + // pstats::trg_map_t::iterator a; + + + // set check; + // if (statsb) + // { + // ppdyn.init(pid1b,*statsb,this->m_numScoreComponents); + // if (statsa) + // { + // pool.init(pid1b, *statsa, *statsb, 0); + // ppfix.init(pid1a,*statsa, 0); + // } + // else + // { + // pool.init(pid1b, *statsb,0); + // ppfix.init(); + // } + + // for (b = statsb->trg.begin(); b != statsb->trg.end(); ++b) + // { + // ppdyn.update(b->first,b->second); + // BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) + // (*ff)(btb,ppdyn); - uint32_t sid,off,len; - parse_pid(b->first, sid, off, len); - Token const* x = bta.T2->sntStart(sid) + off; - TSA::tree_iterator m(bta.I2.get(),x,x+len); + // uint32_t sid,off,len; + // parse_pid(b->first, sid, off, len); + // Token const* x = btb.T2->sntStart(sid) + off; + // TSA::tree_iterator m(bta.I2.get(),x,len); - if (m.size() && statsa && - ((a = statsa->trg.find(m.getPid())) != statsa->trg.end())) - { - // phrase pair found also in btfix - ppfix.update(a->first,a->second); - BOOST_FOREACH(sptr const& ff, m_active_ff_fix) - (*ff)(bta,ppfix,&ppdyn.fvals); - BOOST_FOREACH(sptr const& ff, m_active_ff_common) - (*ff)(bta,ppfix,&ppdyn.fvals); - a->second.invalidate(); - } - else - { - // phrase pair was not found in btfix - - // ... but the source phrase was - if (m.size()) - pool.update(b->first,m.approxOccurrenceCount(), b->second); - - // ... and not even the source phrase - else - pool.update(b->first,b->second); - - BOOST_FOREACH(sptr const& ff, m_active_ff_fix) - (*ff)(btb,pool,&ppdyn.fvals); - BOOST_FOREACH(sptr const& ff, m_active_ff_common) - (*ff)(btb,pool,&ppdyn.fvals); - - } - - tpcoll->Add(createTargetPhrase(src,btb,ppdyn)); - } - } - - // now deal with all phraise pairs that are ONLY in btfix - // (the ones that are in both were dealt with above) - if (statsa) - { - pool.init(pid1a,*statsa,0); - for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a) - { - if (!a->second.valid()) continue; // done above - ppfix.update(a->first,a->second); - BOOST_FOREACH(sptr const& ff, m_active_ff_fix) - (*ff)(bta,ppfix); - BOOST_FOREACH(sptr const& ff, m_active_ff_common) - (*ff)(bta,ppfix); + // Token const* y = m.getToken(0); + // for (size_t i = 0; i < len; ++i) + // cout << x[i].id() << " " << endl; + // for (size_t i = 0; i < m.size(); ++i) + // cout << y[i].id() << " " << endl; - if (btb.I2) - { - uint32_t sid,off,len; - parse_pid(a->first, sid, off, len); - Token const* x = bta.T2->sntStart(sid) + off; - TSA::tree_iterator m(btb.I2.get(),x,x+len); - if (m.size()) - pool.update(a->first,m.approxOccurrenceCount(),a->second); - else - pool.update(a->first,a->second); - } - else pool.update(a->first,a->second); - BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) - (*ff)(btb,pool,&ppfix.fvals); - if (ppfix.p2) - tpcoll->Add(createTargetPhrase(src,bta,ppfix)); - } - } - return (statsa || statsb); - } + // if (statsa && m.size() && + // ((a = statsa->trg.find(m.getPid())) != statsa->trg.end())) + // { // i.e., phrase pair found also in btfix + // ppfix.update(a->first,a->second); + // pool.update(b->first, b->second, a->second); + // BOOST_FOREACH(sptr const& ff, m_active_ff_fix) + // (*ff)(bta, ppfix, &ppdyn.fvals); + // check.insert(a->first); + // } + // else // phrase pair was not found in btfix + // { + // if (m.size()) // ... but the source phrase was + // { + // pool.update(b->first, m.approxOccurrenceCount(), b->second); + // ppfix.update(b->first,m.approxOccurrenceCount()); + // } + // else // ... and not even the source phrase + // { + // pool.update(b->first, b->second); + // ppfix.update(b->first,0); + // } + // BOOST_FOREACH(sptr const& ff, m_active_ff_fix) + // (*ff)(btb, ff->allowPooling() ? pool : ppfix, &ppdyn.fvals); + // } + // BOOST_FOREACH(sptr const& ff, m_active_ff_common) + // (*ff)(btb, pool, &ppdyn.fvals); + // tpcoll->Add(mkTPhrase(src,btb,ppdyn)); + // } + // } + + // // now deal with all phraise pairs that are ONLY in btfix + // // (the ones that are in both were dealt with above) + // if (statsa) + // { + // ppfix.init(pid1a, *statsa, this->m_numScoreComponents); + // pool.init(pid1a, *statsa, 0); + // ppdyn.init(); + // for (a = statsa->trg.begin(); a != statsa->trg.end(); ++a) + // { + // if (check.find(a->first) != check.end()) + // continue; + + // ppfix.update(a->first, a->second); + // BOOST_FOREACH(sptr const& ff, m_active_ff_fix) + // (*ff)(bta, ppfix); + + // if (btb.I2) + // { + // uint32_t sid,off,len; + // parse_pid(a->first, sid, off, len); + // Token const* x = bta.T2->sntStart(sid) + off; + // TSA::tree_iterator m(btb.I2.get(), x, len); + // if (m.size()) + // pool.update(a->first, m.approxOccurrenceCount(), a->second); + // else + // pool.update(a->first, a->second); + // } + // else pool.update(a->first, a->second); + // BOOST_FOREACH(sptr const& ff, m_active_ff_dyn) + // (*ff)(btb, ff->allowPooling() ? pool : ppdyn, &ppfix.fvals); + // BOOST_FOREACH(sptr const& ff, m_active_ff_common) + // (*ff)(bta, pool, &ppfix.fvals); + // if (ppfix.p2) + // tpcoll->Add(mkTPhrase(src, bta, ppfix)); + // } + // } + // return true; + // } Mmsapt:: TargetPhraseCollectionWrapper:: @@ -595,8 +754,34 @@ namespace Moses { assert(this->refCount == 0); } - + template + void + expand(typename Bitext::iter const& m, + Bitext const& bt, + pstats const& ps, vector >& dest) + { + dest.reserve(ps.trg.size()); + PhrasePair pp; + pp.init(m.getToken(0), m.size(), &ps, 0); + // cout << HERE << " " << toString(*(bt.V1),pp.start1,pp.len1) << endl; + pstats::trg_map_t::const_iterator a; + for (a = ps.trg.begin(); a != ps.trg.end(); ++a) + { + uint32_t sid,off,len; + parse_pid(a->first, sid, off, len); + pp.update(bt.T2->sntStart(sid)+off, len, a->second); + dest.push_back(pp); + } + typename PhrasePair::SortByTargetIdSeq sorter; + sort(dest.begin(), dest.end(),sorter); +#if 0 + BOOST_FOREACH(PhrasePair const& p, dest) + cout << toString (*bt.V1,p.start1,p.len1) << " ::: " + << toString (*bt.V2,p.start2,p.len2) << " " + << p.joint << endl; +#endif + } // This is not the most efficient way of phrase lookup! TargetPhraseCollection const* @@ -605,13 +790,9 @@ namespace Moses { // map from Moses Phrase to internal id sequence vector sphrase; - fillIdSeq(src,input_factor,*btfix.V1,sphrase); + fillIdSeq(src,input_factor,*(btfix.V1),sphrase); if (sphrase.size() == 0) return NULL; - // lookup in static bitext - TSA::tree_iterator mfix(btfix.I1.get(),&sphrase[0],sphrase.size()); - - // lookup in dynamic bitext // Reserve a local copy of the dynamic bitext in its current form. /btdyn/ // is set to a new copy of the dynamic bitext every time a sentence pair // is added. /dyn/ keeps the old bitext around as long as we need it. @@ -621,12 +802,13 @@ namespace Moses dyn = btdyn; } assert(dyn); + + // lookup phrases in both bitexts + TSA::tree_iterator mfix(btfix.I1.get(), &sphrase[0], sphrase.size()); TSA::tree_iterator mdyn(dyn->I1.get()); if (dyn->I1.get()) - { - for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i) - mdyn.extend(sphrase[i]); - } + for (size_t i = 0; mdyn.size() == i && i < sphrase.size(); ++i) + mdyn.extend(sphrase[i]); #if 0 cerr << src << endl; @@ -634,43 +816,62 @@ namespace Moses << mdyn.size() << " " << mdyn.getPid() << endl; #endif - // phrase not found in either - if (mdyn.size() != sphrase.size() && - mfix.size() != sphrase.size()) - return NULL; // not found + if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size()) + return NULL; // phrase not found in either bitext // cache lookup: - - uint64_t phrasekey; - if (mfix.size() == sphrase.size()) - phrasekey = (mfix.getPid()<<1); - else - phrasekey = (mdyn.getPid()<<1)+1; - + uint64_t phrasekey = (mfix.size() == sphrase.size() ? (mfix.getPid()<<1) + : (mdyn.getPid()<<1)+1); size_t revision = dyn->revision(); { boost::lock_guard guard(this->lock); tpc_cache_t::iterator c = m_cache.find(phrasekey); + // TO DO: we should revise the revision mechanism: we take the length + // of the dynamic bitext (in sentences) at the time the PT entry + // was stored as the time stamp. For each word in the + // vocabulary, we also store its most recent occurrence in the + // bitext. Only if the timestamp of each word in the phrase is + // newer than the timestamp of the phrase itself we must update + // the entry. if (c != m_cache.end() && c->second->revision == revision) return encache(c->second); } - // not found or not up to date + // OK: pt entry not found or not up to date + // lookup and expansion could be done in parallel threds, + // but ppdyn is probably small anyway + // TO DO: have Bitexts return lists of PhrasePairs instead of pstats + // no need to expand pstats at every single lookup again, especially + // for btfix. sptr sfix,sdyn; - if (mfix.size() == sphrase.size()) - sfix = btfix.lookup(mfix); - if (mdyn.size() == sphrase.size()) - sdyn = dyn->lookup(mdyn); + if (mfix.size() == sphrase.size()) sfix = btfix.lookup(mfix); + if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(mdyn); + + vector > ppfix,ppdyn; + if (sfix) expand(mfix, btfix, *sfix, ppfix); + if (sdyn) expand(mdyn, *dyn, *sdyn, ppdyn); - TargetPhraseCollectionWrapper* - ret = new TargetPhraseCollectionWrapper(revision,phrasekey); - if ((poolCounts && - pool_pstats(src, mfix.getPid(),sfix.get(),btfix, - mdyn.getPid(),sdyn.get(),*dyn,ret)) - || combine_pstats(src, mfix.getPid(),sfix.get(),btfix, - mdyn.getPid(),sdyn.get(),*dyn,ret)) + // now we have two lists of Phrase Pairs, let's merge them + TargetPhraseCollectionWrapper* ret; + ret = new TargetPhraseCollectionWrapper(revision,phrasekey); + PhrasePair::SortByTargetIdSeq sorter; + size_t i = 0; size_t k = 0; + while (i < ppfix.size() && k < ppdyn.size()) + { + int cmp = sorter.cmp(ppfix[i], ppdyn[k]); + if (cmp < 0) ret->Add(mkTPhrase(src,&ppfix[i++],NULL,dyn)); + else if (cmp == 0) ret->Add(mkTPhrase(src,&ppfix[i++],&ppdyn[k++],dyn)); + else ret->Add(mkTPhrase(src,NULL,&ppdyn[k++],dyn)); + } + while (i < ppfix.size()) ret->Add(mkTPhrase(src,&ppfix[i++],NULL,dyn)); + while (k < ppdyn.size()) ret->Add(mkTPhrase(src,NULL,&ppdyn[k++],dyn)); + if (m_tableLimit) ret->Prune(true, m_tableLimit); + else ret->Prune(true,ret->GetSize()); +#if 0 + if (combine_pstats(src, + mfix.getPid(), sfix.get(), btfix, + mdyn.getPid(), sdyn.get(), *dyn, ret)) { - if (m_tableLimit) ret->Prune(true,m_tableLimit); #if 0 sort(ret->begin(), ret->end(), CompareTargetPhrase()); cout << "SOURCE PHRASE: " << src << endl; @@ -686,6 +887,9 @@ namespace Moses } #endif } +#endif + + // put the result in the cache and return boost::lock_guard guard(this->lock); m_cache[phrasekey] = ret; return encache(ret); @@ -839,6 +1043,7 @@ namespace Moses TSA::tree_iterator mfix(btfix.I1.get(),&myphrase[0],myphrase.size()); if (mfix.size() == myphrase.size()) { + btfix.prep(mfix); // cerr << phrase << " " << mfix.approxOccurrenceCount() << endl; return true; } @@ -854,6 +1059,7 @@ namespace Moses { for (size_t i = 0; mdyn.size() == i && i < myphrase.size(); ++i) mdyn.extend(myphrase[i]); + if (mdyn.size() == myphrase.size()) dyn->prep(mdyn); } return mdyn.size() == myphrase.size(); } diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index b6be361313..a7ece8fdb2 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -19,6 +19,7 @@ #include "moses/TranslationModel/UG/mm/ug_typedefs.h" #include "moses/TranslationModel/UG/mm/tpt_pickler.h" #include "moses/TranslationModel/UG/mm/ug_bitext.h" +#include "moses/TranslationModel/UG/mm/ug_phrasepair.h" #include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h" #include "moses/InputFileStream.h" @@ -29,7 +30,8 @@ #include #include "moses/TranslationModel/PhraseDictionary.h" -#include "mmsapt_phrase_scorers.h" +#include "mmsapt_phrase_scorers.h" // deprecated +#include "sapt_phrase_scorers.h" // TO DO: // - make lexical phrase scorer take addition to the "dynamic overlay" into account @@ -47,47 +49,68 @@ namespace Moses #endif { friend class Alignment; + map param; public: typedef L2R_Token Token; typedef mmBitext mmbitext; typedef imBitext imbitext; + typedef Bitext bitext; typedef TSA tsa; typedef PhraseScorer pscorer; + private: + // vector > shards; mmbitext btfix; - sptr btdyn; + sptr btdyn; string bname,extra_data; string L1; string L2; - float m_lbop_parameter; - float m_lex_alpha; + float m_lbop_conf; // confidence level for lbop smoothing + float m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing // alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha) // must be > 0 if dynamic size_t m_default_sample_size; size_t m_workers; // number of worker threads for sampling the bitexts - // deprecated! - char m_pfwd_denom; // denominator for computation of fwd phrase score: - // 'r' - divide by raw count - // 's' - divide by sample count - // 'g' - devide by number of "good" (i.e. coherent) samples - // size_t num_features; + // // deprecated! + // char m_pfwd_denom; // denominator for computation of fwd phrase score: + // // 'r' - divide by raw count + // // 's' - divide by sample count + // // 'g' - devide by number of "good" (i.e. coherent) samples + // // size_t num_features; size_t input_factor; size_t output_factor; // we can actually return entire Tokens! - bool withLogCountFeatures; // add logs of counts as features? - bool withCoherence; - string m_pfwd_features; // which pfwd functions to use - string m_pbwd_features; // which pbwd functions to use + // bool withLogCountFeatures; // add logs of counts as features? + // bool withCoherence; + // string m_pfwd_features; // which pfwd functions to use + // string m_pbwd_features; // which pbwd functions to use + + // for display for human inspection (ttable dumps): vector m_feature_names; // names of features activated + vector m_is_logval; // keeps track of which features are log valued + vector m_is_integer; // keeps track of which features are integer valued + vector > m_active_ff_fix; // activated feature functions (fix) vector > m_active_ff_dyn; // activated feature functions (dyn) vector > m_active_ff_common; // activated feature functions (dyn) - size_t - add_corpus_specific_features - (vector >& ffvec, size_t num_feats); + void + register_ff(sptr const& ff, vector > & registry); + + template + void + check_ff(string const ffname,vector >* registry = NULL); + // add feature function if specified + + template + void + check_ff(string const ffname, float const xtra, vector >* registry = NULL); + // add feature function if specified + + void + add_corpus_specific_features(vector >& ffvec); // built-in feature functions // PScorePfwd calc_pfwd_fix, calc_pfwd_dyn; @@ -140,12 +163,24 @@ namespace Moses mm2dtable_t COOCraw; TargetPhrase* - createTargetPhrase + mkTPhrase(Phrase const& src, + Moses::bitext::PhrasePair* fix, + Moses::bitext::PhrasePair* dyn, + sptr > const& dynbt) const; + + // template + // void + // expand(typename Bitext::iter const& m, Bitext const& bt, + // pstats const& pstats, vector >& dest); + +#if 0 + TargetPhrase* + mkTPhrase (Phrase const& src, Bitext const& bt, - bitext::PhrasePair const& pp + Moses::bitext::PhrasePair const& pp ) const; - +#endif void process_pstats (Phrase const& src, @@ -180,7 +215,7 @@ namespace Moses ) const; void - load_extra_data(string bname); + load_extra_data(string bname, bool locking); mutable size_t m_tpc_ctr; public: @@ -231,8 +266,14 @@ namespace Moses vector const& GetFeatureNames() const; - void - ScorePPfix(bitext::PhrasePair& pp) const; + // void + // ScorePPfix(bitext::PhrasePair& pp) const; + + bool + isLogVal(int i) const; + + bool + isInteger(int i) const; private: }; diff --git a/moses/TranslationModel/UG/mmsapt_align.cc b/moses/TranslationModel/UG/mmsapt_align.cc index 407df648d8..8b6bf1eb2f 100644 --- a/moses/TranslationModel/UG/mmsapt_align.cc +++ b/moses/TranslationModel/UG/mmsapt_align.cc @@ -1,335 +1,336 @@ #include "mmsapt.h" +// currently broken -namespace Moses -{ - using namespace bitext; - using namespace std; - using namespace boost; +// namespace Moses +// { +// using namespace bitext; +// using namespace std; +// using namespace boost; - struct PPgreater - { - bool operator()(PhrasePair const& a, PhrasePair const& b) - { - return a.score > b.score; - } - }; +// struct PPgreater +// { +// bool operator()(PhrasePair const& a, PhrasePair const& b) +// { +// return a.score > b.score; +// } +// }; - void - Mmsapt:: - setWeights(vector const & w) - { - assert(w.size() == this->m_numScoreComponents); - this->feature_weights = w; - } +// void +// Mmsapt:: +// setWeights(vector const & w) +// { +// assert(w.size() == this->m_numScoreComponents); +// this->feature_weights = w; +// } - struct PhraseAlnHyp - { - PhrasePair pp; - ushort s1,e1,s2,e2; // start and end positions - int prev; // preceding alignment hypothesis - float score; - bitvector scov; // source coverage - PhraseAlnHyp(PhrasePair const& ppx, int slen, - pair const& sspan, - pair const& tspan) - : pp(ppx), prev(-1), score(ppx.score), scov(slen) - { - s1 = sspan.first; e1 = sspan.second; - s2 = tspan.first; e2 = tspan.second; - for (size_t i = s1; i < e1; ++i) - scov.set(i); - } +// struct PhraseAlnHyp +// { +// PhrasePair pp; +// ushort s1,e1,s2,e2; // start and end positions +// int prev; // preceding alignment hypothesis +// float score; +// bitvector scov; // source coverage +// PhraseAlnHyp(PhrasePair const& ppx, int slen, +// pair const& sspan, +// pair const& tspan) +// : pp(ppx), prev(-1), score(ppx.score), scov(slen) +// { +// s1 = sspan.first; e1 = sspan.second; +// s2 = tspan.first; e2 = tspan.second; +// for (size_t i = s1; i < e1; ++i) +// scov.set(i); +// } - bool operator<(PhraseAlnHyp const& other) const - { - return this->score < other.score; - } +// bool operator<(PhraseAlnHyp const& other) const +// { +// return this->score < other.score; +// } - bool operator>(PhraseAlnHyp const& other) const - { - return this->score > other.score; - } +// bool operator>(PhraseAlnHyp const& other) const +// { +// return this->score > other.score; +// } - PhraseOrientation - po_bwd(PhraseAlnHyp const* prev) const - { - if (s2 == 0) return po_first; - assert(prev); - assert(prev->e2 <= s2); - if (prev->e2 < s2) return po_other; - if (prev->e1 == s1) return po_mono; - if (prev->e1 < s1) return po_jfwd; - if (prev->s1 == e1) return po_swap; - if (prev->s1 > e1) return po_jbwd; - return po_other; - } +// PhraseOrientation +// po_bwd(PhraseAlnHyp const* prev) const +// { +// if (s2 == 0) return po_first; +// assert(prev); +// assert(prev->e2 <= s2); +// if (prev->e2 < s2) return po_other; +// if (prev->e1 == s1) return po_mono; +// if (prev->e1 < s1) return po_jfwd; +// if (prev->s1 == e1) return po_swap; +// if (prev->s1 > e1) return po_jbwd; +// return po_other; +// } - PhraseOrientation - po_fwd(PhraseAlnHyp const* next) const - { - if (!next) return po_last; - assert(next->s2 >= e2); - if (next->s2 < e2) return po_other; - if (next->e1 == s1) return po_swap; - if (next->e1 < s1) return po_jbwd; - if (next->s1 == e1) return po_mono; - if (next->s1 > e1) return po_jfwd; - return po_other; - } +// PhraseOrientation +// po_fwd(PhraseAlnHyp const* next) const +// { +// if (!next) return po_last; +// assert(next->s2 >= e2); +// if (next->s2 < e2) return po_other; +// if (next->e1 == s1) return po_swap; +// if (next->e1 < s1) return po_jbwd; +// if (next->s1 == e1) return po_mono; +// if (next->s1 > e1) return po_jfwd; +// return po_other; +// } - float - dprob_fwd(PhraseAlnHyp const& next) - { - return pp.dfwd[po_fwd(&next)]; - } +// float +// dprob_fwd(PhraseAlnHyp const& next) +// { +// return pp.dfwd[po_fwd(&next)]; +// } - float - dprob_bwd(PhraseAlnHyp const& prev) - { - return pp.dbwd[po_bwd(&prev)]; - } +// float +// dprob_bwd(PhraseAlnHyp const& prev) +// { +// return pp.dbwd[po_bwd(&prev)]; +// } - }; +// }; - class Alignment - { - typedef L2R_Token Token; - typedef TSA tsa; - typedef pair span; - typedef vector > pidmap_t; // span -> phrase ID - typedef boost::unordered_map > pid2span_t; - typedef pstats::trg_map_t jStatsTable; +// class Alignment +// { +// typedef L2R_Token Token; +// typedef TSA tsa; +// typedef pair span; +// typedef vector > pidmap_t; // span -> phrase ID +// typedef boost::unordered_map > pid2span_t; +// typedef pstats::trg_map_t jStatsTable; - Mmsapt const& PT; - vector s,t; - pidmap_t sspan2pid, tspan2pid; // span -> phrase ID - pid2span_t spid2span,tpid2span; - vector > > spstats; +// Mmsapt const& PT; +// vector s,t; +// pidmap_t sspan2pid, tspan2pid; // span -> phrase ID +// pid2span_t spid2span,tpid2span; +// vector > > spstats; - vector PP; - // position-independent phrase pair info - public: - vector PAH; - vector > tpos2ahyp; - // maps from target start positions to PhraseAlnHyps starting at - // that position +// vector PP; +// // position-independent phrase pair info +// public: +// vector PAH; +// vector > tpos2ahyp; +// // maps from target start positions to PhraseAlnHyps starting at +// // that position - sptr getPstats(span const& sspan); - void fill_tspan_maps(); - void fill_sspan_maps(); - public: - Alignment(Mmsapt const& pt, string const& src, string const& trg); - void show(ostream& out); - void show(ostream& out, PhraseAlnHyp const& ah); - }; +// sptr getPstats(span const& sspan); +// void fill_tspan_maps(); +// void fill_sspan_maps(); +// public: +// Alignment(Mmsapt const& pt, string const& src, string const& trg); +// void show(ostream& out); +// void show(ostream& out, PhraseAlnHyp const& ah); +// }; - void - Alignment:: - show(ostream& out, PhraseAlnHyp const& ah) - { -#if 0 - LexicalPhraseScorer2::table_t const& - COOCjnt = PT.calc_lex.scorer.COOC; +// void +// Alignment:: +// show(ostream& out, PhraseAlnHyp const& ah) +// { +// #if 0 +// LexicalPhraseScorer2::table_t const& +// COOCjnt = PT.calc_lex.scorer.COOC; - out << setw(10) << exp(ah.score) << " " - << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2) - << " <=> " - << PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1); - vector const& a = ah.pp.aln; - // BOOST_FOREACH(int x,a) cout << "[" << x << "] "; - for (size_t u = 0; u+1 < a.size(); u += 2) - out << " " << int(a[u+1]) << "-" << int(a[u]); +// out << setw(10) << exp(ah.score) << " " +// << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2) +// << " <=> " +// << PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1); +// vector const& a = ah.pp.aln; +// // BOOST_FOREACH(int x,a) cout << "[" << x << "] "; +// for (size_t u = 0; u+1 < a.size(); u += 2) +// out << " " << int(a[u+1]) << "-" << int(a[u]); - if (ah.e2-ah.s2 == 1 and ah.e1-ah.s1 == 1) - out << " " << COOCjnt[s[ah.s1]][t[ah.s2]] - << "/" << PT.COOCraw[s[ah.s1]][t[ah.s2]] - << "=" << float(COOCjnt[s[ah.s1]][t[ah.s2]])/PT.COOCraw[s[ah.s1]][t[ah.s2]]; - out << endl; - // float const* ofwdj = ah.pp.dfwd; - // float const* obwdj = ah.pp.dbwd; - // uint32_t const* ofwdm = spstats[ah.s1][ah.e1-ah.s1-1]->ofwd; - // uint32_t const* obwdm = spstats[ah.s1][ah.e1-ah.s1-1]->obwd; - // out << " [first: " << ofwdj[po_first]<<"/"< foo(PAH); - sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo); - for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h) - show(out,PAH[tpos2ahyp[s2][h]]); - } - } +// void +// Alignment:: +// show(ostream& out) +// { +// // show what we have so far ... +// for (size_t s2 = 0; s2 < t.size(); ++s2) +// { +// VectorIndexSorter foo(PAH); +// sort(tpos2ahyp[s2].begin(), tpos2ahyp[s2].end(), foo); +// for (size_t h = 0; h < tpos2ahyp[s2].size(); ++h) +// show(out,PAH[tpos2ahyp[s2][h]]); +// } +// } - sptr - Alignment:: - getPstats(span const& sspan) - { - size_t k = sspan.second - sspan.first - 1; - if (k < spstats[sspan.first].size()) - return spstats[sspan.first][k]; - else return sptr(); - } +// sptr +// Alignment:: +// getPstats(span const& sspan) +// { +// size_t k = sspan.second - sspan.first - 1; +// if (k < spstats[sspan.first].size()) +// return spstats[sspan.first][k]; +// else return sptr(); +// } - void - Alignment:: - fill_tspan_maps() - { - tspan2pid.assign(t.size(),vector(t.size(),0)); - for (size_t i = 0; i < t.size(); ++i) - { - tsa::tree_iterator m(PT.btfix.I2.get()); - for (size_t k = i; k < t.size() && m.extend(t[k]); ++k) - { - uint64_t pid = m.getPid(); - tpid2span[pid].push_back(pair(i,k+1)); - tspan2pid[i][k] = pid; - } - } - } +// void +// Alignment:: +// fill_tspan_maps() +// { +// tspan2pid.assign(t.size(),vector(t.size(),0)); +// for (size_t i = 0; i < t.size(); ++i) +// { +// tsa::tree_iterator m(PT.btfix.I2.get()); +// for (size_t k = i; k < t.size() && m.extend(t[k]); ++k) +// { +// uint64_t pid = m.getPid(); +// tpid2span[pid].push_back(pair(i,k+1)); +// tspan2pid[i][k] = pid; +// } +// } +// } - void - Alignment:: - fill_sspan_maps() - { - sspan2pid.assign(s.size(),vector(s.size(),0)); - spstats.resize(s.size()); - for (size_t i = 0; i < s.size(); ++i) - { - tsa::tree_iterator m(PT.btfix.I1.get()); - for (size_t k = i; k < s.size() && m.extend(s[k]); ++k) - { - uint64_t pid = m.getPid(); - sspan2pid[i][k] = pid; - pid2span_t::iterator p = spid2span.find(pid); - if (p != spid2span.end()) - { - int x = p->second[0].first; - int y = p->second[0].second-1; - spstats[i].push_back(spstats[x][y-x]); - } - else - { - spstats[i].push_back(PT.btfix.lookup(m)); - cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " " - << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt - << endl; - } - spid2span[pid].push_back(pair(i,k+1)); - } - } - } +// void +// Alignment:: +// fill_sspan_maps() +// { +// sspan2pid.assign(s.size(),vector(s.size(),0)); +// spstats.resize(s.size()); +// for (size_t i = 0; i < s.size(); ++i) +// { +// tsa::tree_iterator m(PT.btfix.I1.get()); +// for (size_t k = i; k < s.size() && m.extend(s[k]); ++k) +// { +// uint64_t pid = m.getPid(); +// sspan2pid[i][k] = pid; +// pid2span_t::iterator p = spid2span.find(pid); +// if (p != spid2span.end()) +// { +// int x = p->second[0].first; +// int y = p->second[0].second-1; +// spstats[i].push_back(spstats[x][y-x]); +// } +// else +// { +// spstats[i].push_back(PT.btfix.lookup(m)); +// cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " " +// << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt +// << endl; +// } +// spid2span[pid].push_back(pair(i,k+1)); +// } +// } +// } - Alignment:: - Alignment(Mmsapt const& pt, string const& src, string const& trg) - : PT(pt) - { - PT.btfix.V1->fillIdSeq(src,s); - PT.btfix.V2->fillIdSeq(trg,t); +// Alignment:: +// Alignment(Mmsapt const& pt, string const& src, string const& trg) +// : PT(pt) +// { +// PT.btfix.V1->fillIdSeq(src,s); +// PT.btfix.V2->fillIdSeq(trg,t); - // LexicalPhraseScorer2::table_t const& COOC = PT.calc_lex.scorer.COOC; - // BOOST_FOREACH(id_type i, t) - // { - // cout << (*PT.btfix.V2)[i]; - // if (i < PT.wlex21.size()) - // { - // BOOST_FOREACH(id_type k, PT.wlex21[i]) - // { - // size_t j = COOC[k][i]; - // size_t m1 = COOC.m1(k); - // size_t m2 = COOC.m2(i); - // if (j*1000 > m1 && j*1000 > m2) - // cout << " " << (*PT.btfix.V1)[k]; - // } - // } - // cout << endl; - // } +// // LexicalPhraseScorer2::table_t const& COOC = PT.calc_lex.scorer.COOC; +// // BOOST_FOREACH(id_type i, t) +// // { +// // cout << (*PT.btfix.V2)[i]; +// // if (i < PT.wlex21.size()) +// // { +// // BOOST_FOREACH(id_type k, PT.wlex21[i]) +// // { +// // size_t j = COOC[k][i]; +// // size_t m1 = COOC.m1(k); +// // size_t m2 = COOC.m2(i); +// // if (j*1000 > m1 && j*1000 > m2) +// // cout << " " << (*PT.btfix.V1)[k]; +// // } +// // } +// // cout << endl; +// // } - fill_tspan_maps(); - fill_sspan_maps(); - tpos2ahyp.resize(t.size()); - // now fill the association score table - PAH.reserve(1000000); - typedef pid2span_t::iterator psiter; - for (psiter L = spid2span.begin(); L != spid2span.end(); ++L) - { - if (!L->second.size()) continue; // should never happen anyway - int i = L->second[0].first; - int k = L->second[0].second - i -1; - sptr ps = spstats[i][k]; - PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents); - jStatsTable & J = ps->trg; - for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y) - { - psiter R = tpid2span.find(y->first); - if (R == tpid2span.end()) continue; - pp.update(y->first, y->second); - PT.ScorePPfix(pp); - pp.eval(PT.feature_weights); - PP.push_back(pp); - BOOST_FOREACH(span const& sspan, L->second) - { - BOOST_FOREACH(span const& tspan, R->second) - { - tpos2ahyp[tspan.first].push_back(PAH.size()); - PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan)); - } - } - } - } - } +// fill_tspan_maps(); +// fill_sspan_maps(); +// tpos2ahyp.resize(t.size()); +// // now fill the association score table +// PAH.reserve(1000000); +// typedef pid2span_t::iterator psiter; +// for (psiter L = spid2span.begin(); L != spid2span.end(); ++L) +// { +// if (!L->second.size()) continue; // should never happen anyway +// int i = L->second[0].first; +// int k = L->second[0].second - i -1; +// sptr ps = spstats[i][k]; +// PhrasePair pp; pp.init(L->first,*ps, PT.m_numScoreComponents); +// jStatsTable & J = ps->trg; +// for (jStatsTable::iterator y = J.begin(); y != J.end(); ++y) +// { +// psiter R = tpid2span.find(y->first); +// if (R == tpid2span.end()) continue; +// pp.update(y->first, y->second); +// PT.ScorePPfix(pp); +// pp.eval(PT.feature_weights); +// PP.push_back(pp); +// BOOST_FOREACH(span const& sspan, L->second) +// { +// BOOST_FOREACH(span const& tspan, R->second) +// { +// tpos2ahyp[tspan.first].push_back(PAH.size()); +// PAH.push_back(PhraseAlnHyp(PP.back(),s.size(),sspan,tspan)); +// } +// } +// } +// } +// } - int - extend(vector & PAH, int edge, int next) - { - if ((PAH[edge].scov & PAH[next].scov).count()) - return -1; - int ret = PAH.size(); - PAH.push_back(PAH[next]); - PhraseAlnHyp & h = PAH.back(); - h.prev = edge; - h.scov |= PAH[edge].scov; - h.score += log(PAH[edge].dprob_fwd(PAH[next])); - h.score += log(PAH[next].dprob_bwd(PAH[edge])); - return ret; - } +// int +// extend(vector & PAH, int edge, int next) +// { +// if ((PAH[edge].scov & PAH[next].scov).count()) +// return -1; +// int ret = PAH.size(); +// PAH.push_back(PAH[next]); +// PhraseAlnHyp & h = PAH.back(); +// h.prev = edge; +// h.scov |= PAH[edge].scov; +// h.score += log(PAH[edge].dprob_fwd(PAH[next])); +// h.score += log(PAH[next].dprob_bwd(PAH[edge])); +// return ret; +// } - sptr > - Mmsapt:: - align(string const& src, string const& trg) const - { - // For the time being, we consult only the fixed bitext. - // We might also consider the dynamic bitext. => TO DO. - Alignment A(*this,src,trg); - VectorIndexSorter foo(A.PAH); - vector o; foo.GetOrder(o); - BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]); - sptr > aln; - return aln; -} -} +// sptr > +// Mmsapt:: +// align(string const& src, string const& trg) const +// { +// // For the time being, we consult only the fixed bitext. +// // We might also consider the dynamic bitext. => TO DO. +// Alignment A(*this,src,trg); +// VectorIndexSorter foo(A.PAH); +// vector o; foo.GetOrder(o); +// BOOST_FOREACH(int i, o) A.show(cout,A.PAH[i]); +// sptr > aln; +// return aln; +// } +// } diff --git a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h index 6e852b44b5..083afb3a32 100644 --- a/moses/TranslationModel/UG/mmsapt_phrase_scorers.h +++ b/moses/TranslationModel/UG/mmsapt_phrase_scorers.h @@ -1,268 +1,17 @@ // -*- c++ -*- +// written by Ulrich Germann #pragma once #include "moses/TranslationModel/UG/mm/ug_bitext.h" #include "util/exception.hh" +#include "boost/format.hpp" +#include "sapt_pscore_base.h" + +// DEPRECATED CODE: Word and phrase penalties are now +// added by the decoder. namespace Moses { namespace bitext { - - template - class - PhraseScorer - { - protected: - int m_index; - int m_num_feats; - vector m_feature_names; - public: - - virtual - void - operator()(Bitext const& pt, PhrasePair& pp, vector * dest=NULL) - const = 0; - - int - fcnt() const - { return m_num_feats; } - - vector const & - fnames() const - { return m_feature_names; } - - string const & - fname(int i) const - { - UTIL_THROW_IF2((i < m_index || i >= m_index + m_num_feats), - "Feature name index out of range at " - << __FILE__ << ":" << __LINE__); - return m_feature_names.at(i - m_index); - } - - int - getIndex() const - { return m_index; } - }; - - //////////////////////////////////////////////////////////////////////////////// - - template - class - PScorePfwd : public PhraseScorer - { - float conf; - char denom; - public: - PScorePfwd() - { - this->m_num_feats = 1; - } - - int - init(int const i, float const c, char d) - { - conf = c; - denom = d; - this->m_index = i; - ostringstream buf; - buf << format("pfwd-%c%.3f") % denom % c; - this->m_feature_names.push_back(buf.str()); - return i + this->m_num_feats; - } - - void - operator()(Bitext const& bt, PhrasePair & pp, - vector * dest = NULL) const - { - if (!dest) dest = &pp.fvals; - if (pp.joint > pp.good1) - { - cerr<m_index] = log(lbop(pp.good1, pp.joint, conf)); - break; - case 's': - (*dest)[this->m_index] = log(lbop(pp.sample1, pp.joint, conf)); - break; - case 'r': - (*dest)[this->m_index] = log(lbop(pp.raw1, pp.joint, conf)); - } - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - template - class - PScorePbwd : public PhraseScorer - { - float conf; - char denom; - public: - PScorePbwd() - { - this->m_num_feats = 1; - } - - int - init(int const i, float const c, char d) - { - conf = c; - denom = d; - this->m_index = i; - ostringstream buf; - buf << format("pbwd-%c%.3f") % denom % c; - this->m_feature_names.push_back(buf.str()); - return i + this->m_num_feats; - } - - void - operator()(Bitext const& bt, PhrasePair& pp, - vector * dest = NULL) const - { - if (!dest) dest = &pp.fvals; - // we use the denominator specification to scale the raw counts on the - // target side; the clean way would be to counter-sample - uint32_t r2 = pp.raw2; - if (denom == 'g') r2 = round(r2 * float(pp.good1) / pp.raw1); - else if (denom == 's') r2 = round(r2 * float(pp.sample1) / pp.raw1); - (*dest)[this->m_index] = log(lbop(max(r2, pp.joint),pp.joint,conf)); - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - template - class - PScoreCoherence : public PhraseScorer - { - public: - PScoreCoherence() - { - this->m_num_feats = 1; - } - - int - init(int const i) - { - this->m_index = i; - this->m_feature_names.push_back(string("coherence")); - return i + this->m_num_feats; - } - - void - operator()(Bitext const& bt, PhrasePair& pp, - vector * dest = NULL) const - { - if (!dest) dest = &pp.fvals; - (*dest)[this->m_index] = log(pp.good1) - log(pp.sample1); - } - }; - - //////////////////////////////////////////////////////////////////////////////// - - template - class - PScoreLogCounts : public PhraseScorer - { - float conf; - public: - PScoreLogCounts() - { - this->m_num_feats = 5; - } - - int - init(int const i) - { - this->m_index = i; - this->m_feature_names.push_back("log-r1"); - this->m_feature_names.push_back("log-s1"); - this->m_feature_names.push_back("log-g1"); - this->m_feature_names.push_back("log-j"); - this->m_feature_names.push_back("log-r2"); - return i + this->m_num_feats; - } - - void - operator()(Bitext const& bt, PhrasePair& pp, - vector * dest = NULL) const - { - if (!dest) dest = &pp.fvals; - size_t i = this->m_index; - assert(pp.raw1); - assert(pp.sample1); - assert(pp.good1); - assert(pp.joint); - assert(pp.raw2); - (*dest)[i] = -log(pp.raw1); - (*dest)[++i] = -log(pp.sample1); - (*dest)[++i] = -log(pp.good1); - (*dest)[++i] = +log(pp.joint); - (*dest)[++i] = -log(pp.raw2); - } - }; - - template - class - PScoreLex : public PhraseScorer - { - float const m_alpha; - public: - LexicalPhraseScorer2 scorer; - - PScoreLex(float const a) - : m_alpha(a) - { this->m_num_feats = 2; } - - int - init(int const i, string const& fname) - { - scorer.open(fname); - this->m_index = i; - this->m_feature_names.push_back("lexfwd"); - this->m_feature_names.push_back("lexbwd"); - return i + this->m_num_feats; - } - - void - operator()(Bitext const& bt, PhrasePair& pp, vector * dest = NULL) const - { - if (!dest) dest = &pp.fvals; - uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0; - parse_pid(pp.p1, sid1, off1, len1); - parse_pid(pp.p2, sid2, off2, len2); - -#if 0 - cout << len1 << " " << len2 << endl; - Token const* t1 = bt.T1->sntStart(sid1); - for (size_t i = off1; i < off1 + len1; ++i) - cout << (*bt.V1)[t1[i].id()] << " "; - cout << __FILE__ << ":" << __LINE__ << endl; - - Token const* t2 = bt.T2->sntStart(sid2); - for (size_t i = off2; i < off2 + len2; ++i) - cout << (*bt.V2)[t2[i].id()] << " "; - cout << __FILE__ << ":" << __LINE__ << endl; - - BOOST_FOREACH (int a, pp.aln) - cout << a << " " ; - cout << __FILE__ << ":" << __LINE__ << "\n" << endl; - -#endif - scorer.score(bt.T1->sntStart(sid1)+off1,0,len1, - bt.T2->sntStart(sid2)+off2,0,len2, - pp.aln, m_alpha, - (*dest)[this->m_index], - (*dest)[this->m_index+1]); - } - - }; - /// Word penalty template class @@ -280,7 +29,8 @@ namespace Moses { } void - operator()(Bitext const& bt, PhrasePair& pp, vector * dest = NULL) const + operator()(Bitext const& bt, PhrasePair& pp, + vector * dest = NULL) const { if (!dest) dest = &pp.fvals; uint32_t sid2=0,off2=0,len2=0; @@ -307,7 +57,8 @@ namespace Moses { } void - operator()(Bitext const& bt, PhrasePair& pp, vector * dest = NULL) const + operator()(Bitext const& bt, PhrasePair& pp, + vector * dest = NULL) const { if (!dest) dest = &pp.fvals; (*dest)[this->m_index] = 1; diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc index 106505f053..2cbf89b166 100644 --- a/moses/TranslationModel/UG/ptable-lookup.cc +++ b/moses/TranslationModel/UG/ptable-lookup.cc @@ -106,15 +106,11 @@ int main(int argc, char* argv[]) cout << " "; for (size_t k = idx.first; k < idx.second; ++k) { - if (mmsapt && fname[k-idx.first].substr(0,3) == "log") - { - if(scores[k] < 0) - cout << " " << format("%10d") % round(exp(-scores[k])); - else - cout << " " << format("%10d") % round(exp(scores[k])); - } - else - cout << " " << format("%10.8f") % exp(scores[k]); + size_t j = k-idx.first; + float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k] + : scores[k] < 0 ? exp(scores[k]) : scores[k]); + string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f"; + cout << " " << format(fmt) % f; } cout << endl; } diff --git a/moses/TranslationModel/UG/sapt_phrase_key.h b/moses/TranslationModel/UG/sapt_phrase_key.h new file mode 100644 index 0000000000..e1ecf15739 --- /dev/null +++ b/moses/TranslationModel/UG/sapt_phrase_key.h @@ -0,0 +1,13 @@ +//-*- c++ -*- +#pragma once +#include + +using namespace std; +namespace sapt +{ + using namespace Moses; + using namespace std; + + + +} diff --git a/moses/TranslationModel/UG/sapt_phrase_scorers.h b/moses/TranslationModel/UG/sapt_phrase_scorers.h new file mode 100644 index 0000000000..37cfd26fde --- /dev/null +++ b/moses/TranslationModel/UG/sapt_phrase_scorers.h @@ -0,0 +1,12 @@ +// -*- c++ -*- +// Phrase scoring functions for suffix array-based phrase tables +// written by Ulrich Germann +#pragma once +#include "sapt_pscore_unaligned.h" // count # of unaligned words +#include "sapt_pscore_provenance.h" // reward for joint phrase occ. per corpus +#include "sapt_pscore_rareness.h" // penalty for rare occurrences (global?) +#include "sapt_pscore_logcnt.h" // logs of observed counts +#include "sapt_pscore_lex1.h" // plain vanilla Moses lexical scores +#include "sapt_pscore_pfwd.h" // fwd phrase prob +#include "sapt_pscore_pbwd.h" // bwd phrase prob +#include "sapt_pscore_coherence.h" // coherence feature: good/sample-size diff --git a/moses/TranslationModel/UG/sapt_pscore_base.h b/moses/TranslationModel/UG/sapt_pscore_base.h new file mode 100644 index 0000000000..68a491145c --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_base.h @@ -0,0 +1,103 @@ +// -*- c++ -*- +// Base classes for suffix array-based phrase scorers +// written by Ulrich Germann +#pragma once +#include "moses/TranslationModel/UG/mm/ug_bitext.h" +#include "moses/TranslationModel/UG/mm/ug_phrasepair.h" +#include "util/exception.hh" +#include "boost/format.hpp" + +namespace Moses { + namespace bitext + { + + // abstract base class that defines the common API for phrase scorers + template + class + PhraseScorer + { + protected: + int m_index; + int m_num_feats; + string m_tag; + vector m_feature_names; + public: + + virtual + void + operator()(Bitext const& pt, + PhrasePair& pp, + vector * dest=NULL) + const = 0; + + void + setIndex(int const i) { m_index = i; } + + int + getIndex() const { return m_index; } + + int + fcnt() const { return m_num_feats; } + + vector const & + fnames() const { return m_feature_names; } + + string const & + fname(int i) const + { + if (i < 0) i += m_num_feats; + UTIL_THROW_IF2(i < 0 || i >= m_num_feats, + "Feature name index out of range at " << HERE); + return m_feature_names.at(i); + } + + virtual + bool + isLogVal(int i) const { return true; }; + // is this feature log valued? + + virtual + bool + isIntegerValued(int i) const { return false; }; + // is this feature integer valued (e.g., count features)? + + virtual + bool + allowPooling() const { return true; } + // does this feature function allow pooling of counts if + // there are no occurrences in the respective corpus? + + }; + + // base class for 'families' of phrase scorers that have a single + template + class + SingleRealValuedParameterPhraseScorerFamily + : public PhraseScorer + { + protected: + vector m_x; + + virtual + void + init(string const specs) + { + using namespace boost; + UTIL_THROW_IF2(this->m_tag.size() == 0, + "m_tag must be initialized in constructor"); + UTIL_THROW_IF2(specs.size() == 0,"empty specification string!"); + UTIL_THROW_IF2(this->m_feature_names.size(), + "PhraseScorer can only be initialized once!"); + this->m_index = -1; + float x; char c; + for (istringstream buf(specs); buf>>x; buf>>c) + { + this->m_x.push_back(x); + string fname = (format("%s-%.2f") % this->m_tag % x).str(); + this->m_feature_names.push_back(fname); + } + this->m_num_feats = this->m_x.size(); + } + }; + } // namespace bitext +} // namespace moses diff --git a/moses/TranslationModel/UG/sapt_pscore_coherence.h b/moses/TranslationModel/UG/sapt_pscore_coherence.h new file mode 100644 index 0000000000..a3211df542 --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_coherence.h @@ -0,0 +1,33 @@ +// -*- c++ -*- +// written by Ulrich Germann +#pragma once +#include "moses/TranslationModel/UG/mm/ug_bitext.h" +#include "util/exception.hh" +#include "boost/format.hpp" + +namespace Moses { + namespace bitext + { + template + class + PScoreCoherence : public PhraseScorer + { + public: + PScoreCoherence(string const dummy) + { + this->m_index = -1; + this->m_num_feats = 1; + this->m_feature_names.push_back(string("coherence")); + } + + void + operator()(Bitext const& bt, + PhrasePair& pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + (*dest)[this->m_index] = log(pp.good1) - log(pp.sample1); + } + }; + } +} diff --git a/moses/TranslationModel/UG/sapt_pscore_lex1.h b/moses/TranslationModel/UG/sapt_pscore_lex1.h new file mode 100644 index 0000000000..be994b0d38 --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_lex1.h @@ -0,0 +1,70 @@ +// -*- c++ -*- +// Phrase scorer that counts the number of unaligend words in the phrase +// written by Ulrich Germann + +#include "moses/TranslationModel/UG/mm/ug_bitext.h" +#include "sapt_pscore_base.h" +#include + +namespace Moses { + namespace bitext + { + template + class + PScoreLex1 : public PhraseScorer + { + float m_alpha; + public: + LexicalPhraseScorer2 scorer; + + PScoreLex1(string const& alpaspec, string const& lexfile) + { + this->m_index = -1; + this->m_num_feats = 2; + this->m_feature_names.reserve(2); + this->m_feature_names.push_back("lexfwd"); + this->m_feature_names.push_back("lexbwd"); + m_alpha = atof(alpaspec.c_str()); + scorer.open(lexfile); + } + + void + operator()(Bitext const& bt, + PhrasePair& pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + // uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0; + // parse_pid(pp.p1, sid1, off1, len1); + // parse_pid(pp.p2, sid2, off2, len2); +#if 0 + cout << len1 << " " << len2 << endl; + Token const* t1 = bt.T1->sntStart(sid1); + for (size_t i = off1; i < off1 + len1; ++i) + cout << (*bt.V1)[t1[i].id()] << " "; + cout << __FILE__ << ":" << __LINE__ << endl; + + Token const* t2 = bt.T2->sntStart(sid2); + for (size_t i = off2; i < off2 + len2; ++i) + cout << (*bt.V2)[t2[i].id()] << " "; + cout << __FILE__ << ":" << __LINE__ << endl; + + BOOST_FOREACH (int a, pp.aln) + cout << a << " " ; + cout << __FILE__ << ":" << __LINE__ << "\n" << endl; + + scorer.score(bt.T1->sntStart(sid1)+off1,0,len1, + bt.T2->sntStart(sid2)+off2,0,len2, + pp.aln, m_alpha, + (*dest)[this->m_index], + (*dest)[this->m_index+1]); +#endif + scorer.score(pp.start1,0, pp.len1, + pp.start2,0, pp.len2, pp.aln, m_alpha, + (*dest)[this->m_index], + (*dest)[this->m_index+1]); + } + }; + } //namespace bitext +} // namespace Moses + diff --git a/moses/TranslationModel/UG/sapt_pscore_logcnt.h b/moses/TranslationModel/UG/sapt_pscore_logcnt.h new file mode 100644 index 0000000000..2790323ed0 --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h @@ -0,0 +1,65 @@ +// -*- c++ -*- +// Phrase scorer that rewards the number of phrase pair occurrences in a bitext +// with the asymptotic function x/(j+x) where x > 0 is a function +// parameter that determines the steepness of the rewards curve +// written by Ulrich Germann + +#include "sapt_pscore_base.h" +#include + +using namespace std; +namespace Moses { + namespace bitext { + + template + class + PScoreLogCnt : public PhraseScorer + { + string m_specs; + public: + PScoreLogCnt(string const specs) + { + this->m_index = -1; + this->m_specs = specs; + if (specs.find("r1") != string::npos) // raw source phrase counts + this->m_feature_names.push_back("log-r1"); + if (specs.find("s1") != string::npos) + this->m_feature_names.push_back("log-s1"); // L1 sample size + if (specs.find("g1") != string::npos) // coherent phrases + this->m_feature_names.push_back("log-g1"); + if (specs.find("j") != string::npos) // joint counts + this->m_feature_names.push_back("log-j"); + if (specs.find("r2") != string::npos) // raw target phrase counts + this->m_feature_names.push_back("log-r2"); + this->m_num_feats = this->m_feature_names.size(); + } + + bool + isIntegerValued(int i) const { return true; } + + void + operator()(Bitext const& bt, + PhrasePair& pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + assert(pp.raw1); + assert(pp.sample1); + assert(pp.good1); + assert(pp.joint); + assert(pp.raw2); + size_t i = this->m_index; + if (m_specs.find("r1") != string::npos) + (*dest)[i++] = log(pp.raw1); + if (m_specs.find("s1") != string::npos) + (*dest)[i++] = log(pp.sample1); + if (m_specs.find("g1") != string::npos) + (*dest)[i++] = log(pp.good1); + if (m_specs.find("j") != string::npos) + (*dest)[i++] = log(pp.joint); + if (m_specs.find("r2") != string::npos) + (*dest)[++i] = log(pp.raw2); + } + }; + } // namespace bitext +} // namespace Moses diff --git a/moses/TranslationModel/UG/sapt_pscore_pbwd.h b/moses/TranslationModel/UG/sapt_pscore_pbwd.h new file mode 100644 index 0000000000..f7b4686d7c --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_pbwd.h @@ -0,0 +1,58 @@ +//-*- c++ -*- +// written by Ulrich Germann +#pragma once +#include "moses/TranslationModel/UG/mm/ug_bitext.h" +#include "util/exception.hh" +#include "boost/format.hpp" +#include "boost/foreach.hpp" + +namespace Moses { + namespace bitext + { + template + class + PScorePbwd : public PhraseScorer + { + float conf; + string denom; + + public: + PScorePbwd(float const c, string d) + { + this->m_index = -1; + conf = c; + denom = d; + size_t checksum = d.size(); + BOOST_FOREACH(char const& x, denom) + { + if (x == '+') { --checksum; continue; } + if (x != 'g' && x != 's' && x != 'r') continue; + string s = (format("pbwd-%c%.3f") % x % c).str(); + this->m_feature_names.push_back(s); + } + this->m_num_feats = this->m_feature_names.size(); + UTIL_THROW_IF2(this->m_feature_names.size() != checksum, + "Unknown parameter in specification '" + << d << "' for Pbwd phrase scorer at " << HERE); + } + + void + operator()(Bitext const& bt, + PhrasePair& pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + // we use the denominator specification to scale the raw counts on the + // target side; the clean way would be to counter-sample + size_t i = this->m_index; + BOOST_FOREACH(char const& x, denom) + { + uint32_t m2 = pp.raw2; + if (x == 'g') m2 = round(m2 * float(pp.good1) / pp.raw1); + else if (x == 's') m2 = round(m2 * float(pp.sample1) / pp.raw1); + (*dest)[i++] = log(lbop(max(m2, pp.joint),pp.joint,conf)); + } + } + }; + } // namespace bitext +} // namespace Moses diff --git a/moses/TranslationModel/UG/sapt_pscore_pfwd.h b/moses/TranslationModel/UG/sapt_pscore_pfwd.h new file mode 100644 index 0000000000..ed48a93d24 --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_pfwd.h @@ -0,0 +1,70 @@ +// -*- c++ -*- +// written by Ulrich Germann +#pragma once +#include "moses/TranslationModel/UG/mm/ug_bitext.h" +#include "util/exception.hh" +#include "boost/format.hpp" +#include "boost/foreach.hpp" + +namespace Moses { + namespace bitext + { + template + class + PScorePfwd : public PhraseScorer + { + float conf; + string denom; + + public: + + PScorePfwd(float const c, string d) + { + this->m_index = -1; + conf = c; + denom = d; + size_t checksum = d.size(); + BOOST_FOREACH(char const& x, denom) + { + if (x == '+') { --checksum; continue; } + if (x != 'g' && x != 's' && x != 'r') continue; + string s = (format("pfwd-%c%.3f") % x % c).str(); + this->m_feature_names.push_back(s); + } + this->m_num_feats = this->m_feature_names.size(); + UTIL_THROW_IF2(this->m_feature_names.size() != checksum, + "Unknown parameter in specification '" + << d << "' for Pfwd phrase scorer at " << HERE); + } + + void + operator()(Bitext const& bt, PhrasePair & pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + if (pp.joint > pp.good1) + { + pp.joint = pp.good1; + // cerr<m_index; + BOOST_FOREACH(char const& c, this->denom) + { + switch (c) + { + case 'g': + (*dest)[i++] = log(lbop(pp.good1, pp.joint, conf)); + break; + case 's': + (*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf)); + break; + case 'r': + (*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf)); + } + } + } + }; + } +} + diff --git a/moses/TranslationModel/UG/sapt_pscore_provenance.h b/moses/TranslationModel/UG/sapt_pscore_provenance.h new file mode 100644 index 0000000000..c33b98fe79 --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_provenance.h @@ -0,0 +1,47 @@ +// -*- c++ -*- +// Phrase scorer that rewards the number of phrase pair occurrences in a bitext +// with the asymptotic function j/(j+x) where x > 0 is a function +// parameter that determines the steepness of the rewards curve +// written by Ulrich Germann + +#include "sapt_pscore_base.h" +#include + +using namespace std; +namespace Moses { + namespace bitext { + + // asymptotic provenance feature n/(n+x) + template + class + PScoreProvenance : public SingleRealValuedParameterPhraseScorerFamily + { + public: + + PScoreProvenance(string const& spec) + { + this->m_tag = "prov"; + this->init(spec); + } + + bool + isLogVal(int i) const { return false; } + + void + operator()(Bitext const& bt, + PhrasePair& pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + size_t i = this->m_index; + BOOST_FOREACH(float const x, this->m_x) + (*dest).at(i++) = pp.joint/(x + pp.joint); + } + + bool + allowPooling() const + { return false; } + + }; + } // namespace bitext +} // namespace Moses diff --git a/moses/TranslationModel/UG/sapt_pscore_rareness.h b/moses/TranslationModel/UG/sapt_pscore_rareness.h new file mode 100644 index 0000000000..58f204c88b --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_rareness.h @@ -0,0 +1,41 @@ +// -*- c++ -*- +// Phrase scorer that rewards the number of phrase pair occurrences in a bitext +// with the asymptotic function x/(j+x) where x > 0 is a function +// parameter that determines the steepness of the rewards curve +// written by Ulrich Germann + +#include "sapt_pscore_base.h" +#include + +using namespace std; +namespace Moses { + namespace bitext { + + // rareness penalty: x/(n+x) + template + class + PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily + { + public: + PScoreRareness(string const spec) + { + this->m_tag = "rare"; + this->init(spec); + } + + bool + isLogVal(int i) const { return false; } + + void + operator()(Bitext const& bt, + PhrasePair& pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + size_t i = this->m_index; + BOOST_FOREACH(float const x, this->m_x) + (*dest).at(i++) = x/(x + pp.joint); + } + }; + } // namespace bitext +} // namespace Moses diff --git a/moses/TranslationModel/UG/sapt_pscore_unaligned.h b/moses/TranslationModel/UG/sapt_pscore_unaligned.h new file mode 100644 index 0000000000..bdd2919b4f --- /dev/null +++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h @@ -0,0 +1,67 @@ +// -*- c++ -*- +// Phrase scorer that counts the number of unaligend words in the phrase +// written by Ulrich Germann + +#include "sapt_pscore_base.h" +#include + +namespace Moses { + namespace bitext + { + template + class + PScoreUnaligned : public PhraseScorer + { + typedef boost::dynamic_bitset bitvector; + public: + PScoreUnaligned(string const spec) + { + this->m_index = -1; + int f = this->m_num_feats = atoi(spec.c_str()); + UTIL_THROW_IF2(f != 1 && f != 2,"unal parameter must be 1 or 2 at "<m_feature_names.resize(f); + if (f == 1) + this->m_feature_names[0] = "unal"; + else + { + this->m_feature_names[0] = "unal-s"; + this->m_feature_names[1] = "unal-t"; + } + } + + bool + isLogVal(int i) const { return false; } + + bool + isIntegerValued(int i) const { return true; } + + void + operator()(Bitext const& bt, + PhrasePair& pp, + vector * dest = NULL) const + { + if (!dest) dest = &pp.fvals; + // uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0; + // parse_pid(pp.p1, sid1, off1, len1); + // parse_pid(pp.p2, sid2, off2, len2); + bitvector check1(pp.len1),check2(pp.len2); + for (size_t i = 0; i < pp.aln.size(); ) + { + check1.set(pp.aln[i++]); + check2.set(pp.aln.at(i++)); + } + + if (this->m_num_feats == 1) + { + (*dest)[this->m_index] = pp.len1 - check1.count(); + (*dest)[this->m_index] += pp.len2 - check2.count(); + } + else + { + (*dest)[this->m_index] = pp.len1 - check1.count(); + (*dest)[this->m_index+1] = pp.len2 - check2.count(); + } + } + }; + } // namespace bitext +} // namespace Moses diff --git a/moses/TranslationModel/UG/sim-pe.cc b/moses/TranslationModel/UG/sim-pe.cc new file mode 100644 index 0000000000..58a70cab42 --- /dev/null +++ b/moses/TranslationModel/UG/sim-pe.cc @@ -0,0 +1,83 @@ +#include "mmsapt.h" +#include "moses/Manager.h" +#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" +#include +#include +#include +#include +#include +#include + +using namespace Moses; +using namespace bitext; +using namespace std; +using namespace boost; + +vector fo(1,FactorType(0)); + +ostream& +operator<<(ostream& out, Hypothesis const* x) +{ + vector H; + for (const Hypothesis* h = x; h; h = h->GetPrevHypo()) + H.push_back(h); + for (; H.size(); H.pop_back()) + { + Phrase const& p = H.back()->GetCurrTargetPhrase(); + for (size_t pos = 0 ; pos < p.GetSize() ; pos++) + out << *p.GetFactor(pos, 0) << (H.size() ? " " : ""); + } + return out; +} + +vector ifo; +size_t lineNumber; + +string +translate(string const& source) +{ + StaticData const& global = StaticData::Instance(); + + Sentence sentence; + istringstream ibuf(source+"\n"); + sentence.Read(ibuf,ifo); + + Manager manager(lineNumber, sentence, global.GetSearchAlgorithm()); + manager.ProcessSentence(); + + ostringstream obuf; + const Hypothesis* h = manager.GetBestHypothesis(); + obuf << h; + return obuf.str(); + +} + +int main(int argc, char* argv[]) +{ + Parameter params; + if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(¶ms, argv[0])) + exit(1); + + StaticData const& global = StaticData::Instance(); + global.SetVerboseLevel(0); + ifo = global.GetInputFactorOrder(); + + lineNumber = 0; // TODO: Include sentence request number here? + string source, target, alignment; + while (getline(cin,source)) + { + getline(cin,target); + getline(cin,alignment); + cout << "[S] " << source << endl; + cout << "[H] " << translate(source) << endl; + cout << "[T] " << target << endl; + Mmsapt* pdsa = reinterpret_cast(PhraseDictionary::GetColl()[0]); + pdsa->add(source,target,alignment); + cout << "[X] " << translate(source) << endl; + cout << endl; + } + exit(0); +} + + + diff --git a/moses/TranslationModel/UG/try-align.cc b/moses/TranslationModel/UG/try-align.cc index 30c87ccab8..483ad2c34a 100644 --- a/moses/TranslationModel/UG/try-align.cc +++ b/moses/TranslationModel/UG/try-align.cc @@ -2,32 +2,33 @@ using namespace std; using namespace Moses; +// currently broken Mmsapt* PT; int main(int argc, char* argv[]) { - string base = argv[1]; - string L1 = argv[2]; - string L2 = argv[3]; - ostringstream buf; - buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base=" - << base << " L1=" << L1 << " L2=" << L2; - string configline = buf.str(); - PT = new Mmsapt(configline); - PT->Load(); - float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856, 0.647506 }; - vector weights(w,w+5); - PT->setWeights(weights); - // these values are taken from a moses.ini file; - // is there a convenient way of accessing them from within mmsapt ??? - string eline,fline; - // TokenIndex V; V.open("crp/trn/mm/de.tdx"); - while (getline(cin,eline) && getline(cin,fline)) - { - cout << eline << endl; - cout << fline << endl; - PT->align(eline,fline); - } - delete PT; + // string base = argv[1]; + // string L1 = argv[2]; + // string L2 = argv[3]; + // ostringstream buf; + // buf << "Mmsapt name=PT0 output-factor=0 num-features=5 base=" + // << base << " L1=" << L1 << " L2=" << L2; + // string configline = buf.str(); + // PT = new Mmsapt(configline); + // PT->Load(); + // float w[] = { 0.0582634, 0.0518865, 0.0229819, 0.00640856, 0.647506 }; + // vector weights(w,w+5); + // PT->setWeights(weights); + // // these values are taken from a moses.ini file; + // // is there a convenient way of accessing them from within mmsapt ??? + // string eline,fline; + // // TokenIndex V; V.open("crp/trn/mm/de.tdx"); + // while (getline(cin,eline) && getline(cin,fline)) + // { + // cout << eline << endl; + // cout << fline << endl; + // PT->align(eline,fline); + // } + // delete PT; } diff --git a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp index 8766743b35..a91c583432 100644 --- a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp +++ b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp @@ -345,10 +345,10 @@ string FuzzyMatchWrapper::ExtractTM(WordIndex &wordIndex, long translationId, co // find the best matches according to letter sed string best_path = ""; int best_match = -1; - int best_letter_cost; + unsigned int best_letter_cost; if (lsed_flag) { best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1; - for(int si=0; si= level) { TRACE_ERR(str); } } #define IFVERBOSE(level) if (StaticData::Instance().GetVerboseLevel() >= level) +#define XVERBOSE(level,str) { if (StaticData::Instance().GetVerboseLevel() >= level) { TRACE_ERR("[" << __FILE__ << ":" << __LINE__ << "] ");TRACE_ERR(str); } } +#define HERE __FILE__ << ":" << __LINE__ + #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2) // gcc nth_element() bug diff --git a/scripts/server/moses.py b/scripts/server/moses.py index 155458b9b4..a176c473ab 100644 --- a/scripts/server/moses.py +++ b/scripts/server/moses.py @@ -152,7 +152,7 @@ def find_free_port(p): class MosesServer(ProcessWrapper): - def __init__(self,args=["-fd", "\n"]): + def __init__(self,args=[]): self.process = None mserver_cmd = moses_root+"/bin/mosesserver" self.cmd = [mserver_cmd] + args @@ -175,7 +175,10 @@ def start(self,config=None,args=[],port=7447,debug=False): self.cmd.extend(["--server-port", "%d"%self.port]) if debug: print >>sys.stderr,self.cmd - self.process = Popen(self.cmd,stderr = sys.stderr) + # self.stderr = open("mserver.%d.stderr"%self.port,'w') + # self.stdout = open("mserver.%d.stdout"%self.port,'w') + # self.process = Popen(self.cmd,stderr = self.stderr,stdout = self.stdout) + self.process = Popen(self.cmd) else: devnull = open(os.devnull,"w") self.process = Popen(self.cmd, stderr=devnull, stdout=devnull) @@ -216,10 +219,13 @@ def translate(self,input): elif type(input) is list: return [self.translate(x) for x in input] + elif type(input) is dict: return self.proxy.translate(input) + else: raise Exception("Can't handle input of this type!") + except: attempts += 1 print >>sys.stderr, "WAITING", attempts diff --git a/scripts/server/sim-pe.py b/scripts/server/sim-pe.py index 340695a568..52d1e314a9 100755 --- a/scripts/server/sim-pe.py +++ b/scripts/server/sim-pe.py @@ -127,13 +127,40 @@ def translate(proxy, args, line): param['nbest-distinct'] = True pass attempts = 0 - while attempts < 120: + while attempts < 20: + t1 = time.time() try: - return proxy.translate(param) - except: - print >>sys.stderr, "Waiting", proxy - attempts += 1 + return proxy.translate(param) + + # except xmlrpclib.Fault as e: + # except xmlrpclib.ProtocolError as e: + # except xmlrpclib.ResponseError as e: + except xmlrpclib.Error as e: + time.sleep(2) # give all the stderr stuff a chance to be flushed + print >>sys.stderr," XMLRPC error:",e + print >>sys.stderr, "Input was" + print >>sys.stderr, param + sys.exit(1) + + except IOError as e: + print >>sys.stderr,"I/O error({0}): {1}".format(e.errno, e.strerror) time.sleep(5) + + except: + serverstatus = mserver.process.poll() + if serverstatus == None: + print >>sys.stderr, "Connection failed after %f seconds"%(time.time()-t1) + attempts += 1 + if attempts > 10: + time.sleep(10) + else: + time.sleep(5) + pass + else: + + print >>sys.stderr, "Oopsidaisy, server exited with code %d (signal %d)"\ + %(serverstatus/256,serverstatus%256) + pass pass pass raise Exception("Exception: could not reach translation server.") @@ -210,17 +237,25 @@ def repack_result(idx,result): pass pass - if args.url: - mserver.connect(args.url) - else: - mserver.start(args=mo_args,port=args.port,debug=args.debug) - pass - ref = None aln = None if args.ref: ref = read_data(args.ref) if args.aln: aln = read_data(args.aln) + if ref and aln: + try: + mo_args.index("--serial") + except: + mo_args.append("--serial") + pass + pass + + if args.url: + mserver.connect(args.url) + else: + mserver.start(args=mo_args, port=args.port, debug=args.debug) + pass + if (args.input == "-"): line = sys.stdin.readline() idx = 0 From ef33496ea5b82cff3c07da5ed91a26e4776eb52d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 9 Jul 2014 15:47:34 +0100 Subject: [PATCH 35/84] non-term must have only 1 factor. --- OnDiskPt/Word.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp index 23d29cc7a7..33bdb6cc5e 100644 --- a/OnDiskPt/Word.cpp +++ b/OnDiskPt/Word.cpp @@ -104,14 +104,20 @@ void Word::ConvertToMoses( Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance(); overwrite = Moses::Word(m_isNonTerminal); - // TODO: this conversion should have been done at load time. - util::TokenIter tok(vocab.GetString(m_vocabId), '|'); - - for (std::vector::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) { - UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); - overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal)); + if (m_isNonTerminal) { + const std::string &tok = vocab.GetString(m_vocabId); + overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal)); + } + else { + // TODO: this conversion should have been done at load time. + util::TokenIter tok(vocab.GetString(m_vocabId), '|'); + + for (std::vector::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) { + UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); + overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal)); + } + UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); } - UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size()); } int Word::Compare(const Word &compare) const From c9bd98fb0f7f794a6d352620da854e2d42b48328 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 9 Jul 2014 22:35:59 +0100 Subject: [PATCH 36/84] rename Evaluate() to EvaluateInIsolation() --- moses/FF/BleuScoreFeature.h | 2 +- moses/FF/ConstrainedDecoding.h | 2 +- moses/FF/ControlRecombination.h | 2 +- moses/FF/CountNonTerms.cpp | 2 +- moses/FF/CountNonTerms.h | 2 +- moses/FF/CoveredReferenceFeature.cpp | 2 +- moses/FF/CoveredReferenceFeature.h | 2 +- moses/FF/DecodeFeature.h | 2 +- moses/FF/DistortionScoreProducer.h | 2 +- moses/FF/ExternalFeature.h | 2 +- moses/FF/FeatureFunction.h | 2 +- moses/FF/GlobalLexicalModel.h | 2 +- moses/FF/GlobalLexicalModelUnlimited.h | 2 +- moses/FF/HyperParameterAsWeight.h | 2 +- moses/FF/InputFeature.h | 3 ++- moses/FF/InternalStructStatelessFF.cpp | 2 +- moses/FF/InternalStructStatelessFF.h | 2 +- moses/FF/LexicalReordering/LexicalReordering.h | 2 +- moses/FF/MaxSpanFreeNonTermSource.cpp | 2 +- moses/FF/MaxSpanFreeNonTermSource.h | 2 +- moses/FF/NieceTerminal.cpp | 2 +- moses/FF/NieceTerminal.h | 2 +- moses/FF/OSM-Feature/OpSequenceModel.cpp | 2 +- moses/FF/OSM-Feature/OpSequenceModel.h | 2 +- moses/FF/PhraseBoundaryFeature.h | 2 +- moses/FF/PhraseLengthFeature.cpp | 2 +- moses/FF/PhraseLengthFeature.h | 2 +- moses/FF/PhrasePairFeature.h | 2 +- moses/FF/PhrasePenalty.cpp | 2 +- moses/FF/PhrasePenalty.h | 2 +- moses/FF/ReferenceComparison.h | 2 +- moses/FF/RuleScope.cpp | 2 +- moses/FF/RuleScope.h | 2 +- moses/FF/SetSourcePhrase.cpp | 2 +- moses/FF/SetSourcePhrase.h | 2 +- moses/FF/SkeletonStatefulFF.cpp | 2 +- moses/FF/SkeletonStatefulFF.h | 2 +- moses/FF/SkeletonStatelessFF.cpp | 2 +- moses/FF/SkeletonStatelessFF.h | 2 +- moses/FF/SoftMatchingFeature.h | 2 +- moses/FF/SourceGHKMTreeInputMatchFeature.h | 2 +- moses/FF/SourceWordDeletionFeature.cpp | 2 +- moses/FF/SourceWordDeletionFeature.h | 2 +- moses/FF/SpanLength.cpp | 2 +- moses/FF/SpanLength.h | 2 +- moses/FF/SparseHieroReorderingFeature.h | 2 +- moses/FF/SyntaxRHS.cpp | 2 +- moses/FF/SyntaxRHS.h | 2 +- moses/FF/TargetBigramFeature.h | 2 +- moses/FF/TargetNgramFeature.h | 2 +- moses/FF/TargetWordInsertionFeature.cpp | 2 +- moses/FF/TargetWordInsertionFeature.h | 2 +- moses/FF/TreeStructureFeature.h | 2 +- moses/FF/UnknownWordPenaltyProducer.h | 2 +- moses/FF/WordPenaltyProducer.cpp | 2 +- moses/FF/WordPenaltyProducer.h | 2 +- moses/FF/WordTranslationFeature.h | 2 +- moses/LM/Base.cpp | 2 +- moses/LM/Base.h | 2 +- moses/ScoreComponentCollectionTest.cpp | 2 +- moses/TargetPhrase.cpp | 2 +- 61 files changed, 62 insertions(+), 61 deletions(-) diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h index 99f04f5ff6..e966ed56b6 100644 --- a/moses/FF/BleuScoreFeature.h +++ b/moses/FF/BleuScoreFeature.h @@ -128,7 +128,7 @@ class BleuScoreFeature : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h index 2db192ce87..f9c495c6f0 100644 --- a/moses/FF/ConstrainedDecoding.h +++ b/moses/FF/ConstrainedDecoding.h @@ -41,7 +41,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h index 0100d500d4..8bfc7ce526 100644 --- a/moses/FF/ControlRecombination.h +++ b/moses/FF/ControlRecombination.h @@ -57,7 +57,7 @@ class ControlRecombination : public StatefulFeatureFunction return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/CountNonTerms.cpp b/moses/FF/CountNonTerms.cpp index 92b79cd5d2..03c7b73156 100644 --- a/moses/FF/CountNonTerms.cpp +++ b/moses/FF/CountNonTerms.cpp @@ -16,7 +16,7 @@ CountNonTerms::CountNonTerms(const std::string &line) ReadParameters(); } -void CountNonTerms::Evaluate(const Phrase &sourcePhrase +void CountNonTerms::EvaluateInIsolation(const Phrase &sourcePhrase , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h index 1fe71745d5..754e88b4a6 100644 --- a/moses/FF/CountNonTerms.h +++ b/moses/FF/CountNonTerms.h @@ -12,7 +12,7 @@ class CountNonTerms : public StatelessFeatureFunction bool IsUseable(const FactorMask &mask) const { return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp index 25ab829f85..a38031d7e8 100644 --- a/moses/FF/CoveredReferenceFeature.cpp +++ b/moses/FF/CoveredReferenceFeature.cpp @@ -40,7 +40,7 @@ int CoveredReferenceState::Compare(const FFState& other) const // return (m_coveredRef.size() < otherState.m_coveredRef.size()) ? -1 : +1; } -void CoveredReferenceFeature::Evaluate(const Phrase &source +void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h index cd2b2f9660..510490e76c 100644 --- a/moses/FF/CoveredReferenceFeature.h +++ b/moses/FF/CoveredReferenceFeature.h @@ -52,7 +52,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction return new CoveredReferenceState(); } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h index d795983284..393f9c87a7 100644 --- a/moses/FF/DecodeFeature.h +++ b/moses/FF/DecodeFeature.h @@ -75,7 +75,7 @@ class DecodeFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h index 1bc6493e29..0551b9ae8a 100644 --- a/moses/FF/DistortionScoreProducer.h +++ b/moses/FF/DistortionScoreProducer.h @@ -47,7 +47,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h index 19eb45f2a1..d2eeb8cd04 100644 --- a/moses/FF/ExternalFeature.h +++ b/moses/FF/ExternalFeature.h @@ -51,7 +51,7 @@ class ExternalFeature : public StatefulFeatureFunction void SetParameter(const std::string& key, const std::string& value); - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h index 18b016c8fd..edfd57c92e 100644 --- a/moses/FF/FeatureFunction.h +++ b/moses/FF/FeatureFunction.h @@ -98,7 +98,7 @@ class FeatureFunction // source phrase is the substring that the phrase table uses to look up the target phrase, // may have more factors than actually need, but not guaranteed. // For SCFG decoding, the source contains non-terminals, NOT the raw source from the input sentence - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const = 0; diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h index 664835df52..9418d1b152 100644 --- a/moses/FF/GlobalLexicalModel.h +++ b/moses/FF/GlobalLexicalModel.h @@ -87,7 +87,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h index f12df7d61f..167b80238f 100644 --- a/moses/FF/GlobalLexicalModelUnlimited.h +++ b/moses/FF/GlobalLexicalModelUnlimited.h @@ -97,7 +97,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h index 9db375c0f7..5c3189b20e 100644 --- a/moses/FF/HyperParameterAsWeight.h +++ b/moses/FF/HyperParameterAsWeight.h @@ -17,7 +17,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction virtual bool IsUseable(const FactorMask &mask) const { return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h index e4b1a8d991..c8ad61ffe3 100644 --- a/moses/FF/InputFeature.h +++ b/moses/FF/InputFeature.h @@ -41,11 +41,12 @@ class InputFeature : public StatelessFeatureFunction return m_numRealWordCount; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {} + void Evaluate(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase diff --git a/moses/FF/InternalStructStatelessFF.cpp b/moses/FF/InternalStructStatelessFF.cpp index 06014a1cfe..05b94b87a6 100644 --- a/moses/FF/InternalStructStatelessFF.cpp +++ b/moses/FF/InternalStructStatelessFF.cpp @@ -5,7 +5,7 @@ using namespace std; namespace Moses { -void InternalStructStatelessFF::Evaluate(const Phrase &source +void InternalStructStatelessFF::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h index a0ea3f7124..d7a9a0961f 100644 --- a/moses/FF/InternalStructStatelessFF.h +++ b/moses/FF/InternalStructStatelessFF.h @@ -16,7 +16,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction bool IsUseable(const FactorMask &mask) const { return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index 6255987a4f..66f202126e 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -61,7 +61,7 @@ class LexicalReordering : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/MaxSpanFreeNonTermSource.cpp b/moses/FF/MaxSpanFreeNonTermSource.cpp index 3951fdd270..7165e96f5b 100644 --- a/moses/FF/MaxSpanFreeNonTermSource.cpp +++ b/moses/FF/MaxSpanFreeNonTermSource.cpp @@ -27,7 +27,7 @@ MaxSpanFreeNonTermSource::MaxSpanFreeNonTermSource(const std::string &line) m_glueTargetLHS.SetFactor(0, factor); } -void MaxSpanFreeNonTermSource::Evaluate(const Phrase &source +void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h index a9eec7b5e9..30f1df02cc 100644 --- a/moses/FF/MaxSpanFreeNonTermSource.h +++ b/moses/FF/MaxSpanFreeNonTermSource.h @@ -15,7 +15,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction virtual bool IsUseable(const FactorMask &mask) const { return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp index 88c9f86cd4..6299d9e08c 100644 --- a/moses/FF/NieceTerminal.cpp +++ b/moses/FF/NieceTerminal.cpp @@ -25,7 +25,7 @@ std::vector NieceTerminal::DefaultWeights() const return ret; } -void NieceTerminal::Evaluate(const Phrase &source +void NieceTerminal::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h index b7b398fff4..efa471c001 100644 --- a/moses/FF/NieceTerminal.h +++ b/moses/FF/NieceTerminal.h @@ -19,7 +19,7 @@ class NieceTerminal : public StatelessFeatureFunction return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp index dfa380a774..ba5405729c 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.cpp +++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp @@ -42,7 +42,7 @@ void OpSequenceModel::Load() -void OpSequenceModel:: Evaluate(const Phrase &source +void OpSequenceModel:: EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h index 64cab3044e..0a670cc42e 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.h +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -46,7 +46,7 @@ class OpSequenceModel : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h index fbafc6da99..56ccda7afe 100644 --- a/moses/FF/PhraseBoundaryFeature.h +++ b/moses/FF/PhraseBoundaryFeature.h @@ -60,7 +60,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/PhraseLengthFeature.cpp b/moses/FF/PhraseLengthFeature.cpp index 43e0d1b2de..7850c374a6 100644 --- a/moses/FF/PhraseLengthFeature.cpp +++ b/moses/FF/PhraseLengthFeature.cpp @@ -15,7 +15,7 @@ PhraseLengthFeature::PhraseLengthFeature(const std::string &line) ReadParameters(); } -void PhraseLengthFeature::Evaluate(const Phrase &source +void PhraseLengthFeature::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h index ba835f6545..95640b12f2 100644 --- a/moses/FF/PhraseLengthFeature.h +++ b/moses/FF/PhraseLengthFeature.h @@ -41,7 +41,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h index 7790e9035d..ce4822f2f8 100644 --- a/moses/FF/PhrasePairFeature.h +++ b/moses/FF/PhrasePairFeature.h @@ -52,7 +52,7 @@ class PhrasePairFeature: public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/PhrasePenalty.cpp b/moses/FF/PhrasePenalty.cpp index b3e493707a..ddd21e491a 100644 --- a/moses/FF/PhrasePenalty.cpp +++ b/moses/FF/PhrasePenalty.cpp @@ -10,7 +10,7 @@ PhrasePenalty::PhrasePenalty(const std::string &line) ReadParameters(); } -void PhrasePenalty::Evaluate(const Phrase &source +void PhrasePenalty::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h index a4014abf1f..09e82db055 100644 --- a/moses/FF/PhrasePenalty.h +++ b/moses/FF/PhrasePenalty.h @@ -14,7 +14,7 @@ class PhrasePenalty : public StatelessFeatureFunction return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h index 8b0341fd61..417d38ec44 100644 --- a/moses/FF/ReferenceComparison.h +++ b/moses/FF/ReferenceComparison.h @@ -15,7 +15,7 @@ class ReferenceComparison : public StatelessFeatureFunction virtual bool IsUseable(const FactorMask &mask) const { return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp index e949c33377..ed329c4ca8 100644 --- a/moses/FF/RuleScope.cpp +++ b/moses/FF/RuleScope.cpp @@ -16,7 +16,7 @@ bool IsAmbiguous(const Word &word, bool sourceSyntax) return word.IsNonTerminal() && (!sourceSyntax || word == inputDefaultNonTerminal); } -void RuleScope::Evaluate(const Phrase &source +void RuleScope::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h index 4ac10c804b..53334e789f 100644 --- a/moses/FF/RuleScope.h +++ b/moses/FF/RuleScope.h @@ -14,7 +14,7 @@ class RuleScope : public StatelessFeatureFunction virtual bool IsUseable(const FactorMask &mask) const { return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/SetSourcePhrase.cpp b/moses/FF/SetSourcePhrase.cpp index 0a2eaa4cb9..f89683f28b 100644 --- a/moses/FF/SetSourcePhrase.cpp +++ b/moses/FF/SetSourcePhrase.cpp @@ -10,7 +10,7 @@ SetSourcePhrase::SetSourcePhrase(const std::string &line) ReadParameters(); } -void SetSourcePhrase::Evaluate(const Phrase &source +void SetSourcePhrase::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h index 0d7ad2adea..dfc480f9e3 100644 --- a/moses/FF/SetSourcePhrase.h +++ b/moses/FF/SetSourcePhrase.h @@ -14,7 +14,7 @@ class SetSourcePhrase : public StatelessFeatureFunction virtual bool IsUseable(const FactorMask &mask) const { return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp index 2dfec5fad6..85df270e2c 100644 --- a/moses/FF/SkeletonStatefulFF.cpp +++ b/moses/FF/SkeletonStatefulFF.cpp @@ -23,7 +23,7 @@ SkeletonStatefulFF::SkeletonStatefulFF(const std::string &line) ReadParameters(); } -void SkeletonStatefulFF::Evaluate(const Phrase &source +void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h index 9cbe6b512c..448f1ed0e8 100644 --- a/moses/FF/SkeletonStatefulFF.h +++ b/moses/FF/SkeletonStatefulFF.h @@ -30,7 +30,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction return new SkeletonState(0); } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp index c05e27dec8..e032063b3c 100644 --- a/moses/FF/SkeletonStatelessFF.cpp +++ b/moses/FF/SkeletonStatelessFF.cpp @@ -13,7 +13,7 @@ SkeletonStatelessFF::SkeletonStatelessFF(const std::string &line) ReadParameters(); } -void SkeletonStatelessFF::Evaluate(const Phrase &source +void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h index 5adb35f6dc..9e9b4bdfde 100644 --- a/moses/FF/SkeletonStatelessFF.h +++ b/moses/FF/SkeletonStatelessFF.h @@ -15,7 +15,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h index b823c2426e..542c9d459d 100644 --- a/moses/FF/SoftMatchingFeature.h +++ b/moses/FF/SoftMatchingFeature.h @@ -22,7 +22,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction virtual void EvaluateChart(const ChartHypothesis& hypo, ScoreComponentCollection* accumulator) const; - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {}; diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h index a1ddae3259..b910d54b63 100644 --- a/moses/FF/SourceGHKMTreeInputMatchFeature.h +++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h @@ -17,7 +17,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction void SetParameter(const std::string& key, const std::string& value); - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {}; diff --git a/moses/FF/SourceWordDeletionFeature.cpp b/moses/FF/SourceWordDeletionFeature.cpp index 101e405799..e5167b93be 100644 --- a/moses/FF/SourceWordDeletionFeature.cpp +++ b/moses/FF/SourceWordDeletionFeature.cpp @@ -63,7 +63,7 @@ bool SourceWordDeletionFeature::IsUseable(const FactorMask &mask) const return ret; } -void SourceWordDeletionFeature::Evaluate(const Phrase &source +void SourceWordDeletionFeature::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h index 9b04476af1..bd1ddb2398 100644 --- a/moses/FF/SourceWordDeletionFeature.h +++ b/moses/FF/SourceWordDeletionFeature.h @@ -28,7 +28,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction bool IsUseable(const FactorMask &mask) const; - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp index 6192334be7..966aa0b944 100644 --- a/moses/FF/SpanLength.cpp +++ b/moses/FF/SpanLength.cpp @@ -21,7 +21,7 @@ SpanLength::SpanLength(const std::string &line) ReadParameters(); } -void SpanLength::Evaluate(const Phrase &source +void SpanLength::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h index ba2196f871..caa6878b8c 100644 --- a/moses/FF/SpanLength.h +++ b/moses/FF/SpanLength.h @@ -14,7 +14,7 @@ class SpanLength : public StatelessFeatureFunction virtual bool IsUseable(const FactorMask &mask) const { return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h index 5d0f5830c4..82b9890741 100644 --- a/moses/FF/SparseHieroReorderingFeature.h +++ b/moses/FF/SparseHieroReorderingFeature.h @@ -31,7 +31,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction void SetParameter(const std::string& key, const std::string& value); - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp index 292eac0048..abcff2c3be 100644 --- a/moses/FF/SyntaxRHS.cpp +++ b/moses/FF/SyntaxRHS.cpp @@ -14,7 +14,7 @@ SyntaxRHS::SyntaxRHS(const std::string &line) ReadParameters(); } -void SyntaxRHS::Evaluate(const Phrase &source +void SyntaxRHS::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h index 1f9adcb938..9a59597ba1 100644 --- a/moses/FF/SyntaxRHS.h +++ b/moses/FF/SyntaxRHS.h @@ -15,7 +15,7 @@ class SyntaxRHS : public StatelessFeatureFunction return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h index fe2500ad27..8c600ab3ab 100644 --- a/moses/FF/TargetBigramFeature.h +++ b/moses/FF/TargetBigramFeature.h @@ -54,7 +54,7 @@ class TargetBigramFeature : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index 8e91a08b29..7ea236d9d8 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -199,7 +199,7 @@ class TargetNgramFeature : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/TargetWordInsertionFeature.cpp b/moses/FF/TargetWordInsertionFeature.cpp index 7bb1ae6e9e..c8db6bfe38 100644 --- a/moses/FF/TargetWordInsertionFeature.cpp +++ b/moses/FF/TargetWordInsertionFeature.cpp @@ -53,7 +53,7 @@ void TargetWordInsertionFeature::Load() m_unrestricted = false; } -void TargetWordInsertionFeature::Evaluate(const Phrase &source +void TargetWordInsertionFeature::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h index eedde61b2f..58ea10a4bd 100644 --- a/moses/FF/TargetWordInsertionFeature.h +++ b/moses/FF/TargetWordInsertionFeature.h @@ -28,7 +28,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction void Load(); - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h index 0fbf0f9ea5..f422c4a878 100644 --- a/moses/FF/TreeStructureFeature.h +++ b/moses/FF/TreeStructureFeature.h @@ -152,7 +152,7 @@ class TreeStructureFeature : public StatefulFeatureFunction return true; } - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {}; diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h index 3b48f4380f..93ae6d7ec1 100644 --- a/moses/FF/UnknownWordPenaltyProducer.h +++ b/moses/FF/UnknownWordPenaltyProducer.h @@ -44,7 +44,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/WordPenaltyProducer.cpp b/moses/FF/WordPenaltyProducer.cpp index 6dea01b72f..1e191d0402 100644 --- a/moses/FF/WordPenaltyProducer.cpp +++ b/moses/FF/WordPenaltyProducer.cpp @@ -17,7 +17,7 @@ WordPenaltyProducer::WordPenaltyProducer(const std::string &line) s_instance = this; } -void WordPenaltyProducer::Evaluate(const Phrase &source +void WordPenaltyProducer::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h index ffd9216771..337ae2666e 100644 --- a/moses/FF/WordPenaltyProducer.h +++ b/moses/FF/WordPenaltyProducer.h @@ -27,7 +27,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction return true; } - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h index 072ba1d6ad..a264e2fe4b 100644 --- a/moses/FF/WordTranslationFeature.h +++ b/moses/FF/WordTranslationFeature.h @@ -60,7 +60,7 @@ class WordTranslationFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/LM/Base.cpp b/moses/LM/Base.cpp index f59b5e31b5..db71119d57 100644 --- a/moses/LM/Base.cpp +++ b/moses/LM/Base.cpp @@ -69,7 +69,7 @@ void LanguageModel::ReportHistoryOrder(std::ostream &out,const Phrase &phrase) c // out << "ReportHistoryOrder not implemented"; } -void LanguageModel::Evaluate(const Phrase &source +void LanguageModel::EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/LM/Base.h b/moses/LM/Base.h index abae5de241..ef148c8b6c 100644 --- a/moses/LM/Base.h +++ b/moses/LM/Base.h @@ -87,7 +87,7 @@ class LanguageModel : public StatefulFeatureFunction virtual void IncrementalCallback(Incremental::Manager &manager) const; virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const; - virtual void Evaluate(const Phrase &source + virtual void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp index de542d1f69..3e6fd57293 100644 --- a/moses/ScoreComponentCollectionTest.cpp +++ b/moses/ScoreComponentCollectionTest.cpp @@ -43,7 +43,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore) const {} - void Evaluate(const Phrase &source + void EvaluateInIsolation(const Phrase &source , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index d356ab2ccc..2d9d1a06ee 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -129,7 +129,7 @@ void TargetPhrase::Evaluate(const Phrase &source, const std::vector Date: Wed, 9 Jul 2014 23:06:54 +0100 Subject: [PATCH 37/84] rename Evaluate() to EvaluateWithSourceContext() --- moses/ChartTranslationOption.cpp | 2 +- moses/FF/BleuScoreFeature.h | 2 +- moses/FF/ConstrainedDecoding.h | 2 +- moses/FF/ControlRecombination.h | 2 +- moses/FF/CountNonTerms.h | 2 +- moses/FF/CoveredReferenceFeature.cpp | 2 +- moses/FF/CoveredReferenceFeature.h | 2 +- moses/FF/DecodeFeature.h | 2 +- moses/FF/DistortionScoreProducer.h | 2 +- moses/FF/ExternalFeature.h | 2 +- moses/FF/FeatureFunction.h | 2 +- moses/FF/GlobalLexicalModel.h | 2 +- moses/FF/GlobalLexicalModelUnlimited.h | 2 +- moses/FF/HyperParameterAsWeight.h | 2 +- moses/FF/InputFeature.cpp | 2 +- moses/FF/InputFeature.h | 2 +- moses/FF/InternalStructStatelessFF.cpp | 2 +- moses/FF/InternalStructStatelessFF.h | 2 +- moses/FF/LexicalReordering/LexicalReordering.h | 2 +- moses/FF/MaxSpanFreeNonTermSource.cpp | 2 +- moses/FF/MaxSpanFreeNonTermSource.h | 2 +- moses/FF/NieceTerminal.cpp | 2 +- moses/FF/NieceTerminal.h | 2 +- moses/FF/OSM-Feature/OpSequenceModel.h | 2 +- moses/FF/PhraseBoundaryFeature.h | 2 +- moses/FF/PhraseLengthFeature.h | 2 +- moses/FF/PhrasePairFeature.h | 2 +- moses/FF/PhrasePenalty.h | 2 +- moses/FF/ReferenceComparison.h | 2 +- moses/FF/RuleScope.h | 2 +- moses/FF/SetSourcePhrase.h | 2 +- moses/FF/SkeletonStatefulFF.cpp | 2 +- moses/FF/SkeletonStatefulFF.h | 2 +- moses/FF/SkeletonStatelessFF.cpp | 2 +- moses/FF/SkeletonStatelessFF.h | 2 +- moses/FF/SoftMatchingFeature.h | 2 +- moses/FF/SourceGHKMTreeInputMatchFeature.cpp | 2 +- moses/FF/SourceGHKMTreeInputMatchFeature.h | 2 +- moses/FF/SourceWordDeletionFeature.h | 2 +- moses/FF/SpanLength.cpp | 2 +- moses/FF/SpanLength.h | 2 +- moses/FF/SparseHieroReorderingFeature.h | 2 +- moses/FF/SyntaxRHS.cpp | 2 +- moses/FF/SyntaxRHS.h | 2 +- moses/FF/TargetBigramFeature.h | 2 +- moses/FF/TargetNgramFeature.h | 2 +- moses/FF/TargetWordInsertionFeature.h | 2 +- moses/FF/TreeStructureFeature.h | 2 +- moses/FF/UnknownWordPenaltyProducer.h | 2 +- moses/FF/WordPenaltyProducer.h | 2 +- moses/FF/WordTranslationFeature.h | 2 +- moses/LM/Base.h | 2 +- moses/ScoreComponentCollectionTest.cpp | 2 +- moses/TargetPhrase.cpp | 2 +- 54 files changed, 54 insertions(+), 54 deletions(-) diff --git a/moses/ChartTranslationOption.cpp b/moses/ChartTranslationOption.cpp index 0fece0a093..daf1f89ce5 100644 --- a/moses/ChartTranslationOption.cpp +++ b/moses/ChartTranslationOption.cpp @@ -18,7 +18,7 @@ void ChartTranslationOption::Evaluate(const InputType &input, for (size_t i = 0; i < ffs.size(); ++i) { const FeatureFunction &ff = *ffs[i]; - ff.Evaluate(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown); + ff.EvaluateWithSourceContext(input, inputPath, m_targetPhrase, &stackVec, m_scoreBreakdown); } } diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h index e966ed56b6..cb974da207 100644 --- a/moses/FF/BleuScoreFeature.h +++ b/moses/FF/BleuScoreFeature.h @@ -121,7 +121,7 @@ class BleuScoreFeature : public StatefulFeatureFunction FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h index f9c495c6f0..21d8a69c0a 100644 --- a/moses/FF/ConstrainedDecoding.h +++ b/moses/FF/ConstrainedDecoding.h @@ -47,7 +47,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction , ScoreComponentCollection &estimatedFutureScore) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h index 8bfc7ce526..c35714d543 100644 --- a/moses/FF/ControlRecombination.h +++ b/moses/FF/ControlRecombination.h @@ -62,7 +62,7 @@ class ControlRecombination : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h index 754e88b4a6..1b078978c5 100644 --- a/moses/FF/CountNonTerms.h +++ b/moses/FF/CountNonTerms.h @@ -17,7 +17,7 @@ class CountNonTerms : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp index a38031d7e8..81c19dc4bf 100644 --- a/moses/FF/CoveredReferenceFeature.cpp +++ b/moses/FF/CoveredReferenceFeature.cpp @@ -46,7 +46,7 @@ void CoveredReferenceFeature::EvaluateInIsolation(const Phrase &source , ScoreComponentCollection &estimatedFutureScore) const {} -void CoveredReferenceFeature::Evaluate(const InputType &input +void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h index 510490e76c..b1c77d4e31 100644 --- a/moses/FF/CoveredReferenceFeature.h +++ b/moses/FF/CoveredReferenceFeature.h @@ -56,7 +56,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h index 393f9c87a7..27906fa872 100644 --- a/moses/FF/DecodeFeature.h +++ b/moses/FF/DecodeFeature.h @@ -68,7 +68,7 @@ class DecodeFeature : public StatelessFeatureFunction void EvaluateChart(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h index 0551b9ae8a..bc979d231b 100644 --- a/moses/FF/DistortionScoreProducer.h +++ b/moses/FF/DistortionScoreProducer.h @@ -40,7 +40,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction throw std::logic_error("DistortionScoreProducer not supported in chart decoder, yet"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h index d2eeb8cd04..b2be498d45 100644 --- a/moses/FF/ExternalFeature.h +++ b/moses/FF/ExternalFeature.h @@ -56,7 +56,7 @@ class ExternalFeature : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h index edfd57c92e..42ac129745 100644 --- a/moses/FF/FeatureFunction.h +++ b/moses/FF/FeatureFunction.h @@ -110,7 +110,7 @@ class FeatureFunction // It is guaranteed to be in the same order as the non-terms in the source phrase. // For pb models, stackvec is NULL. // No FF should set estimatedFutureScore in both overloads! - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h index 9418d1b152..16963117b0 100644 --- a/moses/FF/GlobalLexicalModel.h +++ b/moses/FF/GlobalLexicalModel.h @@ -80,7 +80,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction throw std::logic_error("GlobalLexicalModel not supported in chart decoder, yet"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h index 167b80238f..cc7bd17e96 100644 --- a/moses/FF/GlobalLexicalModelUnlimited.h +++ b/moses/FF/GlobalLexicalModelUnlimited.h @@ -90,7 +90,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h index 5c3189b20e..e35e610245 100644 --- a/moses/FF/HyperParameterAsWeight.h +++ b/moses/FF/HyperParameterAsWeight.h @@ -23,7 +23,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction , ScoreComponentCollection &estimatedFutureScore) const {} - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/InputFeature.cpp b/moses/FF/InputFeature.cpp index 0fa2005d1b..61753c5951 100644 --- a/moses/FF/InputFeature.cpp +++ b/moses/FF/InputFeature.cpp @@ -44,7 +44,7 @@ void InputFeature::SetParameter(const std::string& key, const std::string& value } -void InputFeature::Evaluate(const InputType &input +void InputFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h index c8ad61ffe3..2c83a958c2 100644 --- a/moses/FF/InputFeature.h +++ b/moses/FF/InputFeature.h @@ -47,7 +47,7 @@ class InputFeature : public StatelessFeatureFunction , ScoreComponentCollection &estimatedFutureScore) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/InternalStructStatelessFF.cpp b/moses/FF/InternalStructStatelessFF.cpp index 05b94b87a6..a050bd8ef9 100644 --- a/moses/FF/InternalStructStatelessFF.cpp +++ b/moses/FF/InternalStructStatelessFF.cpp @@ -15,7 +15,7 @@ void InternalStructStatelessFF::EvaluateInIsolation(const Phrase &source } -void InternalStructStatelessFF::Evaluate(const InputType &input +void InternalStructStatelessFF::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h index d7a9a0961f..2d1258bd76 100644 --- a/moses/FF/InternalStructStatelessFF.h +++ b/moses/FF/InternalStructStatelessFF.h @@ -21,7 +21,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index 66f202126e..1071ebfd68 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -54,7 +54,7 @@ class LexicalReordering : public StatefulFeatureFunction ScoreComponentCollection*) const { UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/MaxSpanFreeNonTermSource.cpp b/moses/FF/MaxSpanFreeNonTermSource.cpp index 7165e96f5b..9de5826358 100644 --- a/moses/FF/MaxSpanFreeNonTermSource.cpp +++ b/moses/FF/MaxSpanFreeNonTermSource.cpp @@ -35,7 +35,7 @@ void MaxSpanFreeNonTermSource::EvaluateInIsolation(const Phrase &source targetPhrase.SetRuleSource(source); } -void MaxSpanFreeNonTermSource::Evaluate(const InputType &input +void MaxSpanFreeNonTermSource::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h index 30f1df02cc..f0d0e34e65 100644 --- a/moses/FF/MaxSpanFreeNonTermSource.h +++ b/moses/FF/MaxSpanFreeNonTermSource.h @@ -20,7 +20,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp index 6299d9e08c..c8b62ea29c 100644 --- a/moses/FF/NieceTerminal.cpp +++ b/moses/FF/NieceTerminal.cpp @@ -33,7 +33,7 @@ void NieceTerminal::EvaluateInIsolation(const Phrase &source targetPhrase.SetRuleSource(source); } -void NieceTerminal::Evaluate(const InputType &input +void NieceTerminal::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h index efa471c001..0953be44fb 100644 --- a/moses/FF/NieceTerminal.h +++ b/moses/FF/NieceTerminal.h @@ -23,7 +23,7 @@ class NieceTerminal : public StatelessFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h index 0a670cc42e..94fa6899de 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.h +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -39,7 +39,7 @@ class OpSequenceModel : public StatefulFeatureFunction int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h index 56ccda7afe..33bf43cb0b 100644 --- a/moses/FF/PhraseBoundaryFeature.h +++ b/moses/FF/PhraseBoundaryFeature.h @@ -53,7 +53,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h index 95640b12f2..f154a2ef60 100644 --- a/moses/FF/PhraseLengthFeature.h +++ b/moses/FF/PhraseLengthFeature.h @@ -33,7 +33,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction throw std::logic_error("PhraseLengthFeature not valid in chart decoder"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h index ce4822f2f8..8d2a6a659b 100644 --- a/moses/FF/PhrasePairFeature.h +++ b/moses/FF/PhrasePairFeature.h @@ -45,7 +45,7 @@ class PhrasePairFeature: public StatelessFeatureFunction throw std::logic_error("PhrasePairFeature not valid in chart decoder"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h index 09e82db055..2002f88a3e 100644 --- a/moses/FF/PhrasePenalty.h +++ b/moses/FF/PhrasePenalty.h @@ -26,7 +26,7 @@ class PhrasePenalty : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h index 417d38ec44..7235aa5884 100644 --- a/moses/FF/ReferenceComparison.h +++ b/moses/FF/ReferenceComparison.h @@ -21,7 +21,7 @@ class ReferenceComparison : public StatelessFeatureFunction , ScoreComponentCollection &estimatedFutureScore) const {} - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h index 53334e789f..0aa0552856 100644 --- a/moses/FF/RuleScope.h +++ b/moses/FF/RuleScope.h @@ -19,7 +19,7 @@ class RuleScope : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h index dfc480f9e3..c78fe14fae 100644 --- a/moses/FF/SetSourcePhrase.h +++ b/moses/FF/SetSourcePhrase.h @@ -19,7 +19,7 @@ class SetSourcePhrase : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp index 85df270e2c..3c7eb249c1 100644 --- a/moses/FF/SkeletonStatefulFF.cpp +++ b/moses/FF/SkeletonStatefulFF.cpp @@ -29,7 +29,7 @@ void SkeletonStatefulFF::EvaluateInIsolation(const Phrase &source , ScoreComponentCollection &estimatedFutureScore) const {} -void SkeletonStatefulFF::Evaluate(const InputType &input +void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h index 448f1ed0e8..c88381cefc 100644 --- a/moses/FF/SkeletonStatefulFF.h +++ b/moses/FF/SkeletonStatefulFF.h @@ -34,7 +34,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp index e032063b3c..6c6193372a 100644 --- a/moses/FF/SkeletonStatelessFF.cpp +++ b/moses/FF/SkeletonStatelessFF.cpp @@ -29,7 +29,7 @@ void SkeletonStatelessFF::EvaluateInIsolation(const Phrase &source } -void SkeletonStatelessFF::Evaluate(const InputType &input +void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h index 9e9b4bdfde..4b88fd2136 100644 --- a/moses/FF/SkeletonStatelessFF.h +++ b/moses/FF/SkeletonStatelessFF.h @@ -19,7 +19,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h index 542c9d459d..110fc87bb7 100644 --- a/moses/FF/SoftMatchingFeature.h +++ b/moses/FF/SoftMatchingFeature.h @@ -26,7 +26,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {}; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp index 0dbb3a7be1..38238b10c0 100644 --- a/moses/FF/SourceGHKMTreeInputMatchFeature.cpp +++ b/moses/FF/SourceGHKMTreeInputMatchFeature.cpp @@ -32,7 +32,7 @@ void SourceGHKMTreeInputMatchFeature::SetParameter(const std::string& key, const } // assumes that source-side syntax labels are stored in the target non-terminal field of the rules -void SourceGHKMTreeInputMatchFeature::Evaluate(const InputType &input +void SourceGHKMTreeInputMatchFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h index b910d54b63..80ce6af90e 100644 --- a/moses/FF/SourceGHKMTreeInputMatchFeature.h +++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h @@ -22,7 +22,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {}; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h index bd1ddb2398..cba5ec0043 100644 --- a/moses/FF/SourceWordDeletionFeature.h +++ b/moses/FF/SourceWordDeletionFeature.h @@ -32,7 +32,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp index 966aa0b944..7a7c87be83 100644 --- a/moses/FF/SpanLength.cpp +++ b/moses/FF/SpanLength.cpp @@ -29,7 +29,7 @@ void SpanLength::EvaluateInIsolation(const Phrase &source targetPhrase.SetRuleSource(source); } -void SpanLength::Evaluate(const InputType &input +void SpanLength::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h index caa6878b8c..7b3726a0eb 100644 --- a/moses/FF/SpanLength.h +++ b/moses/FF/SpanLength.h @@ -19,7 +19,7 @@ class SpanLength : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h index 82b9890741..b0f20d7f78 100644 --- a/moses/FF/SparseHieroReorderingFeature.h +++ b/moses/FF/SparseHieroReorderingFeature.h @@ -36,7 +36,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {} - virtual void Evaluate(const InputType &input + virtual void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp index abcff2c3be..e168ff4dde 100644 --- a/moses/FF/SyntaxRHS.cpp +++ b/moses/FF/SyntaxRHS.cpp @@ -21,7 +21,7 @@ void SyntaxRHS::EvaluateInIsolation(const Phrase &source { } -void SyntaxRHS::Evaluate(const InputType &input +void SyntaxRHS::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h index 9a59597ba1..fedeac7246 100644 --- a/moses/FF/SyntaxRHS.h +++ b/moses/FF/SyntaxRHS.h @@ -19,7 +19,7 @@ class SyntaxRHS : public StatelessFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h index 8c600ab3ab..6d4170a8a3 100644 --- a/moses/FF/TargetBigramFeature.h +++ b/moses/FF/TargetBigramFeature.h @@ -47,7 +47,7 @@ class TargetBigramFeature : public StatefulFeatureFunction ScoreComponentCollection* ) const { throw std::logic_error("TargetBigramFeature not valid in chart decoder"); } - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index 7ea236d9d8..a1da40d32c 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -192,7 +192,7 @@ class TargetNgramFeature : public StatefulFeatureFunction virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h index 58ea10a4bd..3a9230c9da 100644 --- a/moses/FF/TargetWordInsertionFeature.h +++ b/moses/FF/TargetWordInsertionFeature.h @@ -32,7 +32,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h index f422c4a878..fa915f9164 100644 --- a/moses/FF/TreeStructureFeature.h +++ b/moses/FF/TreeStructureFeature.h @@ -156,7 +156,7 @@ class TreeStructureFeature : public StatefulFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const {}; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h index 93ae6d7ec1..5a741db570 100644 --- a/moses/FF/UnknownWordPenaltyProducer.h +++ b/moses/FF/UnknownWordPenaltyProducer.h @@ -37,7 +37,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction void EvaluateChart(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h index 337ae2666e..594610344d 100644 --- a/moses/FF/WordPenaltyProducer.h +++ b/moses/FF/WordPenaltyProducer.h @@ -37,7 +37,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction void EvaluateChart(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h index a264e2fe4b..91dce3130b 100644 --- a/moses/FF/WordTranslationFeature.h +++ b/moses/FF/WordTranslationFeature.h @@ -53,7 +53,7 @@ class WordTranslationFeature : public StatelessFeatureFunction void EvaluateChart(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/LM/Base.h b/moses/LM/Base.h index ef148c8b6c..2be19e5bd4 100644 --- a/moses/LM/Base.h +++ b/moses/LM/Base.h @@ -91,7 +91,7 @@ class LanguageModel : public StatefulFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp index 3e6fd57293..719e05e7d3 100644 --- a/moses/ScoreComponentCollectionTest.cpp +++ b/moses/ScoreComponentCollectionTest.cpp @@ -36,7 +36,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction StatelessFeatureFunction(n, line) {} void Evaluate(const Hypothesis&, ScoreComponentCollection*) const {} void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {} - void Evaluate(const InputType &input + void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp index 2d9d1a06ee..aef4f0feeb 100644 --- a/moses/TargetPhrase.cpp +++ b/moses/TargetPhrase.cpp @@ -148,7 +148,7 @@ void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath) for (size_t i = 0; i < ffs.size(); ++i) { const FeatureFunction &ff = *ffs[i]; if (! staticData.IsFeatureFunctionIgnored( ff )) { - ff.Evaluate(input, inputPath, *this, NULL, m_scoreBreakdown, &futureScoreBreakdown); + ff.EvaluateWithSourceContext(input, inputPath, *this, NULL, m_scoreBreakdown, &futureScoreBreakdown); } } float weightedScore = m_scoreBreakdown.GetWeightedScore(); From 12a14221e283e6e1d69f14d35437f36b9a728e50 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 9 Jul 2014 23:41:08 +0100 Subject: [PATCH 38/84] rename Evaluate() to EvaluateWhenApplied() --- moses/FF/BleuScoreFeature.cpp | 2 +- moses/FF/BleuScoreFeature.h | 2 +- moses/FF/ConstrainedDecoding.cpp | 2 +- moses/FF/ConstrainedDecoding.h | 2 +- moses/FF/ControlRecombination.cpp | 2 +- moses/FF/ControlRecombination.h | 2 +- moses/FF/CountNonTerms.h | 2 +- moses/FF/CoveredReferenceFeature.cpp | 2 +- moses/FF/CoveredReferenceFeature.h | 2 +- moses/FF/DecodeFeature.h | 2 +- moses/FF/DistortionScoreProducer.cpp | 2 +- moses/FF/DistortionScoreProducer.h | 2 +- moses/FF/ExternalFeature.cpp | 2 +- moses/FF/ExternalFeature.h | 2 +- moses/FF/GlobalLexicalModel.cpp | 2 +- moses/FF/GlobalLexicalModel.h | 2 +- moses/FF/GlobalLexicalModelUnlimited.cpp | 2 +- moses/FF/GlobalLexicalModelUnlimited.h | 2 +- moses/FF/HyperParameterAsWeight.h | 2 +- moses/FF/InputFeature.h | 2 +- moses/FF/InternalStructStatelessFF.h | 2 +- moses/FF/LexicalReordering/LexicalReordering.cpp | 2 +- moses/FF/LexicalReordering/LexicalReordering.h | 2 +- moses/FF/MaxSpanFreeNonTermSource.h | 2 +- moses/FF/NieceTerminal.cpp | 2 +- moses/FF/NieceTerminal.h | 2 +- moses/FF/OSM-Feature/OpSequenceModel.cpp | 2 +- moses/FF/OSM-Feature/OpSequenceModel.h | 2 +- moses/FF/PhraseBoundaryFeature.cpp | 2 +- moses/FF/PhraseBoundaryFeature.h | 2 +- moses/FF/PhraseLengthFeature.h | 2 +- moses/FF/PhrasePairFeature.cpp | 2 +- moses/FF/PhrasePairFeature.h | 2 +- moses/FF/PhrasePenalty.h | 2 +- moses/FF/ReferenceComparison.h | 2 +- moses/FF/RuleScope.h | 2 +- moses/FF/SetSourcePhrase.h | 2 +- moses/FF/SkeletonStatefulFF.cpp | 2 +- moses/FF/SkeletonStatefulFF.h | 2 +- moses/FF/SkeletonStatelessFF.cpp | 2 +- moses/FF/SkeletonStatelessFF.h | 2 +- moses/FF/SoftMatchingFeature.h | 2 +- moses/FF/SourceGHKMTreeInputMatchFeature.h | 2 +- moses/FF/SourceWordDeletionFeature.h | 2 +- moses/FF/SpanLength.h | 2 +- moses/FF/SparseHieroReorderingFeature.h | 2 +- moses/FF/StatefulFeatureFunction.h | 2 +- moses/FF/StatelessFeatureFunction.h | 2 +- moses/FF/SyntaxRHS.cpp | 2 +- moses/FF/SyntaxRHS.h | 2 +- moses/FF/TargetBigramFeature.cpp | 2 +- moses/FF/TargetBigramFeature.h | 2 +- moses/FF/TargetNgramFeature.cpp | 2 +- moses/FF/TargetNgramFeature.h | 2 +- moses/FF/TargetWordInsertionFeature.h | 2 +- moses/FF/TreeStructureFeature.h | 2 +- moses/FF/UnknownWordPenaltyProducer.h | 2 +- moses/FF/WordPenaltyProducer.h | 2 +- moses/FF/WordTranslationFeature.cpp | 2 +- moses/FF/WordTranslationFeature.h | 2 +- moses/Hypothesis.cpp | 6 +++--- moses/LM/DALMWrapper.cpp | 2 +- moses/LM/DALMWrapper.h | 2 +- moses/LM/Implementation.cpp | 2 +- moses/LM/Implementation.h | 2 +- moses/LM/Ken.cpp | 2 +- moses/LM/Ken.h | 2 +- moses/ScoreComponentCollectionTest.cpp | 2 +- 68 files changed, 70 insertions(+), 70 deletions(-) diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp index 348eaa0eaf..0fb1e257d8 100644 --- a/moses/FF/BleuScoreFeature.cpp +++ b/moses/FF/BleuScoreFeature.cpp @@ -502,7 +502,7 @@ void BleuScoreFeature::GetClippedNgramMatchesAndCounts(Phrase& phrase, * Given a previous state, compute Bleu score for the updated state with an additional target * phrase translated. */ -FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo, +FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h index cb974da207..c383648f06 100644 --- a/moses/FF/BleuScoreFeature.h +++ b/moses/FF/BleuScoreFeature.h @@ -115,7 +115,7 @@ class BleuScoreFeature : public StatefulFeatureFunction std::vector< size_t >&, size_t skip = 0) const; - FFState* Evaluate( const Hypothesis& cur_hypo, + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; FFState* EvaluateChart(const ChartHypothesis& cur_hypo, diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp index 9a8ecd1c3b..e0bc188ad6 100644 --- a/moses/FF/ConstrainedDecoding.cpp +++ b/moses/FF/ConstrainedDecoding.cpp @@ -100,7 +100,7 @@ const std::vector *GetConstraint(const std::map } } -FFState* ConstrainedDecoding::Evaluate( +FFState* ConstrainedDecoding::EvaluateWhenApplied( const Hypothesis& hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h index 21d8a69c0a..c7eef65223 100644 --- a/moses/FF/ConstrainedDecoding.h +++ b/moses/FF/ConstrainedDecoding.h @@ -55,7 +55,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - FFState* Evaluate( + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp index d3e7c82ab2..adc36145ea 100644 --- a/moses/FF/ControlRecombination.cpp +++ b/moses/FF/ControlRecombination.cpp @@ -56,7 +56,7 @@ std::vector ControlRecombination::DefaultWeights() const return ret; } -FFState* ControlRecombination::Evaluate( +FFState* ControlRecombination::EvaluateWhenApplied( const Hypothesis& hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h index c35714d543..f5b48027b8 100644 --- a/moses/FF/ControlRecombination.h +++ b/moses/FF/ControlRecombination.h @@ -69,7 +69,7 @@ class ControlRecombination : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - FFState* Evaluate( + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h index 1b078978c5..0962da5569 100644 --- a/moses/FF/CountNonTerms.h +++ b/moses/FF/CountNonTerms.h @@ -25,7 +25,7 @@ class CountNonTerms : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp index 81c19dc4bf..1a43b29888 100644 --- a/moses/FF/CoveredReferenceFeature.cpp +++ b/moses/FF/CoveredReferenceFeature.cpp @@ -90,7 +90,7 @@ void CoveredReferenceFeature::SetParameter(const std::string& key, const std::st } } -FFState* CoveredReferenceFeature::Evaluate( +FFState* CoveredReferenceFeature::EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h index b1c77d4e31..4d4275f293 100644 --- a/moses/FF/CoveredReferenceFeature.h +++ b/moses/FF/CoveredReferenceFeature.h @@ -62,7 +62,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - FFState* Evaluate( + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h index 27906fa872..c13af8d710 100644 --- a/moses/FF/DecodeFeature.h +++ b/moses/FF/DecodeFeature.h @@ -62,7 +62,7 @@ class DecodeFeature : public StatelessFeatureFunction bool IsUseable(const FactorMask &mask) const; void SetParameter(const std::string& key, const std::string& value); - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/DistortionScoreProducer.cpp b/moses/FF/DistortionScoreProducer.cpp index 303f352368..5995fe2130 100644 --- a/moses/FF/DistortionScoreProducer.cpp +++ b/moses/FF/DistortionScoreProducer.cpp @@ -87,7 +87,7 @@ float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo, } -FFState* DistortionScoreProducer::Evaluate( +FFState* DistortionScoreProducer::EvaluateWhenApplied( const Hypothesis& hypo, const FFState* prev_state, ScoreComponentCollection* out) const diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h index bc979d231b..5f90c6e591 100644 --- a/moses/FF/DistortionScoreProducer.h +++ b/moses/FF/DistortionScoreProducer.h @@ -28,7 +28,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction virtual const FFState* EmptyHypothesisState(const InputType &input) const; - virtual FFState* Evaluate( + virtual FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/ExternalFeature.cpp b/moses/FF/ExternalFeature.cpp index 1415411709..8fe6125441 100644 --- a/moses/FF/ExternalFeature.cpp +++ b/moses/FF/ExternalFeature.cpp @@ -51,7 +51,7 @@ void ExternalFeature::SetParameter(const std::string& key, const std::string& va } } -FFState* ExternalFeature::Evaluate( +FFState* ExternalFeature::EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h index b2be498d45..f845842a5c 100644 --- a/moses/FF/ExternalFeature.h +++ b/moses/FF/ExternalFeature.h @@ -63,7 +63,7 @@ class ExternalFeature : public StatefulFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - FFState* Evaluate( + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/GlobalLexicalModel.cpp b/moses/FF/GlobalLexicalModel.cpp index ff9e87bb0b..f6eb165a80 100644 --- a/moses/FF/GlobalLexicalModel.cpp +++ b/moses/FF/GlobalLexicalModel.cpp @@ -165,7 +165,7 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP return score; } -void GlobalLexicalModel::Evaluate +void GlobalLexicalModel::EvaluateWhenApplied (const Hypothesis& hypo, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h index 16963117b0..1af2e79e1a 100644 --- a/moses/FF/GlobalLexicalModel.h +++ b/moses/FF/GlobalLexicalModel.h @@ -70,7 +70,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction bool IsUseable(const FactorMask &mask) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/GlobalLexicalModelUnlimited.cpp b/moses/FF/GlobalLexicalModelUnlimited.cpp index a6883a7e85..c8dbd5883d 100644 --- a/moses/FF/GlobalLexicalModelUnlimited.cpp +++ b/moses/FF/GlobalLexicalModelUnlimited.cpp @@ -108,7 +108,7 @@ void GlobalLexicalModelUnlimited::InitializeForInput( Sentence const& in ) m_local->input = ∈ } -void GlobalLexicalModelUnlimited::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const +void GlobalLexicalModelUnlimited::EvaluateWhenApplied(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const { const Sentence& input = *(m_local->input); const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase(); diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h index cc7bd17e96..688cc607cb 100644 --- a/moses/FF/GlobalLexicalModelUnlimited.h +++ b/moses/FF/GlobalLexicalModelUnlimited.h @@ -81,7 +81,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction //TODO: This implements the old interface, but cannot be updated because //it appears to be stateful - void Evaluate(const Hypothesis& cur_hypo, + void EvaluateWhenApplied(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const; void EvaluateChart(const ChartHypothesis& /* cur_hypo */, diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h index e35e610245..4ed181431f 100644 --- a/moses/FF/HyperParameterAsWeight.h +++ b/moses/FF/HyperParameterAsWeight.h @@ -31,7 +31,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h index 2c83a958c2..7193d90d37 100644 --- a/moses/FF/InputFeature.h +++ b/moses/FF/InputFeature.h @@ -54,7 +54,7 @@ class InputFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h index 2d1258bd76..821dfb826a 100644 --- a/moses/FF/InternalStructStatelessFF.h +++ b/moses/FF/InternalStructStatelessFF.h @@ -27,7 +27,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp index d692336c9b..426a7d91cb 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.cpp +++ b/moses/FF/LexicalReordering/LexicalReordering.cpp @@ -69,7 +69,7 @@ Scores LexicalReordering::GetProb(const Phrase& f, const Phrase& e) const return m_table->GetScore(f, e, Phrase(ARRAY_SIZE_INCR)); } -FFState* LexicalReordering::Evaluate(const Hypothesis& hypo, +FFState* LexicalReordering::EvaluateWhenApplied(const Hypothesis& hypo, const FFState* prev_state, ScoreComponentCollection* out) const { diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index 1071ebfd68..b6610639ea 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -45,7 +45,7 @@ class LexicalReordering : public StatefulFeatureFunction Scores GetProb(const Phrase& f, const Phrase& e) const; - virtual FFState* Evaluate(const Hypothesis& cur_hypo, + virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h index f0d0e34e65..df5b98417b 100644 --- a/moses/FF/MaxSpanFreeNonTermSource.h +++ b/moses/FF/MaxSpanFreeNonTermSource.h @@ -27,7 +27,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp index c8b62ea29c..921a95cbe3 100644 --- a/moses/FF/NieceTerminal.cpp +++ b/moses/FF/NieceTerminal.cpp @@ -71,7 +71,7 @@ void NieceTerminal::EvaluateWithSourceContext(const InputType &input } -void NieceTerminal::Evaluate(const Hypothesis& hypo, +void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h index 0953be44fb..93e55d6347 100644 --- a/moses/FF/NieceTerminal.h +++ b/moses/FF/NieceTerminal.h @@ -29,7 +29,7 @@ class NieceTerminal : public StatelessFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; void EvaluateChart(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp index ba5405729c..bc245d988b 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.cpp +++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp @@ -87,7 +87,7 @@ void OpSequenceModel:: EvaluateInIsolation(const Phrase &source } -FFState* OpSequenceModel::Evaluate( +FFState* OpSequenceModel::EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h index 94fa6899de..7dbe2e0ca0 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.h +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -29,7 +29,7 @@ class OpSequenceModel : public StatefulFeatureFunction void readLanguageModel(const char *); void Load(); - FFState* Evaluate( + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/PhraseBoundaryFeature.cpp b/moses/FF/PhraseBoundaryFeature.cpp index d82181b76a..3fdcf27f9c 100644 --- a/moses/FF/PhraseBoundaryFeature.cpp +++ b/moses/FF/PhraseBoundaryFeature.cpp @@ -66,7 +66,7 @@ void PhraseBoundaryFeature::AddFeatures( } -FFState* PhraseBoundaryFeature::Evaluate +FFState* PhraseBoundaryFeature::EvaluateWhenApplied (const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* scores) const { diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h index 33bf43cb0b..9aec700dcb 100644 --- a/moses/FF/PhraseBoundaryFeature.h +++ b/moses/FF/PhraseBoundaryFeature.h @@ -44,7 +44,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction virtual const FFState* EmptyHypothesisState(const InputType &) const; - virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, + virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */, diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h index f154a2ef60..cf26d9a20b 100644 --- a/moses/FF/PhraseLengthFeature.h +++ b/moses/FF/PhraseLengthFeature.h @@ -24,7 +24,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction return true; } - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/PhrasePairFeature.cpp b/moses/FF/PhrasePairFeature.cpp index 9277e19f27..f359b68f75 100644 --- a/moses/FF/PhrasePairFeature.cpp +++ b/moses/FF/PhrasePairFeature.cpp @@ -106,7 +106,7 @@ void PhrasePairFeature::Load() } } -void PhrasePairFeature::Evaluate( +void PhrasePairFeature::EvaluateWhenApplied( const Hypothesis& hypo, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h index 8d2a6a659b..94bf35af3e 100644 --- a/moses/FF/PhrasePairFeature.h +++ b/moses/FF/PhrasePairFeature.h @@ -37,7 +37,7 @@ class PhrasePairFeature: public StatelessFeatureFunction bool IsUseable(const FactorMask &mask) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; void EvaluateChart(const ChartHypothesis& hypo, diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h index 2002f88a3e..2babc7d676 100644 --- a/moses/FF/PhrasePenalty.h +++ b/moses/FF/PhrasePenalty.h @@ -19,7 +19,7 @@ class PhrasePenalty : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h index 7235aa5884..571242ce43 100644 --- a/moses/FF/ReferenceComparison.h +++ b/moses/FF/ReferenceComparison.h @@ -29,7 +29,7 @@ class ReferenceComparison : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h index 0aa0552856..a051e411ad 100644 --- a/moses/FF/RuleScope.h +++ b/moses/FF/RuleScope.h @@ -27,7 +27,7 @@ class RuleScope : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h index c78fe14fae..3f5bc82868 100644 --- a/moses/FF/SetSourcePhrase.h +++ b/moses/FF/SetSourcePhrase.h @@ -27,7 +27,7 @@ class SetSourcePhrase : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp index 3c7eb249c1..0d1a0f9118 100644 --- a/moses/FF/SkeletonStatefulFF.cpp +++ b/moses/FF/SkeletonStatefulFF.cpp @@ -37,7 +37,7 @@ void SkeletonStatefulFF::EvaluateWithSourceContext(const InputType &input , ScoreComponentCollection *estimatedFutureScore) const {} -FFState* SkeletonStatefulFF::Evaluate( +FFState* SkeletonStatefulFF::EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h index c88381cefc..fd93bce55a 100644 --- a/moses/FF/SkeletonStatefulFF.h +++ b/moses/FF/SkeletonStatefulFF.h @@ -40,7 +40,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - FFState* Evaluate( + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp index 6c6193372a..446d57b932 100644 --- a/moses/FF/SkeletonStatelessFF.cpp +++ b/moses/FF/SkeletonStatelessFF.cpp @@ -44,7 +44,7 @@ void SkeletonStatelessFF::EvaluateWithSourceContext(const InputType &input } -void SkeletonStatelessFF::Evaluate(const Hypothesis& hypo, +void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h index 4b88fd2136..5d772b398c 100644 --- a/moses/FF/SkeletonStatelessFF.h +++ b/moses/FF/SkeletonStatelessFF.h @@ -25,7 +25,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; void EvaluateChart(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h index 110fc87bb7..37568325f1 100644 --- a/moses/FF/SoftMatchingFeature.h +++ b/moses/FF/SoftMatchingFeature.h @@ -32,7 +32,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {}; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {}; bool Load(const std::string &filePath); diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h index 80ce6af90e..ef9155f489 100644 --- a/moses/FF/SourceGHKMTreeInputMatchFeature.h +++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h @@ -29,7 +29,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {}; void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h index cba5ec0043..a2fec0f85c 100644 --- a/moses/FF/SourceWordDeletionFeature.h +++ b/moses/FF/SourceWordDeletionFeature.h @@ -39,7 +39,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h index 7b3726a0eb..7792cc6d09 100644 --- a/moses/FF/SpanLength.h +++ b/moses/FF/SpanLength.h @@ -27,7 +27,7 @@ class SpanLength : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const; - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h index b0f20d7f78..45ff1884a2 100644 --- a/moses/FF/SparseHieroReorderingFeature.h +++ b/moses/FF/SparseHieroReorderingFeature.h @@ -44,7 +44,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h index 75b46d827f..bf47ec5cf0 100644 --- a/moses/FF/StatefulFeatureFunction.h +++ b/moses/FF/StatefulFeatureFunction.h @@ -29,7 +29,7 @@ class StatefulFeatureFunction: public FeatureFunction * hypothesis, you should store it in an FFState object which will be passed * in as prev_state. If you don't do this, you will get in trouble. */ - virtual FFState* Evaluate( + virtual FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const = 0; diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h index fde740115b..e300ac8d32 100644 --- a/moses/FF/StatelessFeatureFunction.h +++ b/moses/FF/StatelessFeatureFunction.h @@ -23,7 +23,7 @@ class StatelessFeatureFunction: public FeatureFunction /** * This should be implemented for features that apply to phrase-based models. **/ - virtual void Evaluate(const Hypothesis& hypo, + virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const = 0; /** diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp index e168ff4dde..de886cd0cb 100644 --- a/moses/FF/SyntaxRHS.cpp +++ b/moses/FF/SyntaxRHS.cpp @@ -42,7 +42,7 @@ void SyntaxRHS::EvaluateWithSourceContext(const InputType &input } -void SyntaxRHS::Evaluate(const Hypothesis& hypo, +void SyntaxRHS::EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h index fedeac7246..ed455220bd 100644 --- a/moses/FF/SyntaxRHS.h +++ b/moses/FF/SyntaxRHS.h @@ -25,7 +25,7 @@ class SyntaxRHS : public StatelessFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; void EvaluateChart(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/TargetBigramFeature.cpp b/moses/FF/TargetBigramFeature.cpp index 104f986e74..f1da62b7dd 100644 --- a/moses/FF/TargetBigramFeature.cpp +++ b/moses/FF/TargetBigramFeature.cpp @@ -64,7 +64,7 @@ const FFState* TargetBigramFeature::EmptyHypothesisState(const InputType &/*inpu return new TargetBigramState(m_bos); } -FFState* TargetBigramFeature::Evaluate(const Hypothesis& cur_hypo, +FFState* TargetBigramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h index 6d4170a8a3..6b26bb2695 100644 --- a/moses/FF/TargetBigramFeature.h +++ b/moses/FF/TargetBigramFeature.h @@ -39,7 +39,7 @@ class TargetBigramFeature : public StatefulFeatureFunction virtual const FFState* EmptyHypothesisState(const InputType &input) const; - virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, + virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */, diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp index b0abb07a17..a2fc4e0400 100644 --- a/moses/FF/TargetNgramFeature.cpp +++ b/moses/FF/TargetNgramFeature.cpp @@ -95,7 +95,7 @@ const FFState* TargetNgramFeature::EmptyHypothesisState(const InputType &/*input return new TargetNgramState(bos); } -FFState* TargetNgramFeature::Evaluate(const Hypothesis& cur_hypo, +FFState* TargetNgramFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index a1da40d32c..914538dd4b 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -186,7 +186,7 @@ class TargetNgramFeature : public StatefulFeatureFunction virtual const FFState* EmptyHypothesisState(const InputType &input) const; - virtual FFState* Evaluate(const Hypothesis& cur_hypo, const FFState* prev_state, + virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h index 3a9230c9da..6d48e7a982 100644 --- a/moses/FF/TargetWordInsertionFeature.h +++ b/moses/FF/TargetWordInsertionFeature.h @@ -39,7 +39,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {} - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h index fa915f9164..100e378c65 100644 --- a/moses/FF/TreeStructureFeature.h +++ b/moses/FF/TreeStructureFeature.h @@ -162,7 +162,7 @@ class TreeStructureFeature : public StatefulFeatureFunction , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore = NULL) const {}; - FFState* Evaluate( + FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");}; diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h index 5a741db570..d1ac80a026 100644 --- a/moses/FF/UnknownWordPenaltyProducer.h +++ b/moses/FF/UnknownWordPenaltyProducer.h @@ -31,7 +31,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction } std::vector DefaultWeights() const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h index 594610344d..5dc07c679c 100644 --- a/moses/FF/WordPenaltyProducer.h +++ b/moses/FF/WordPenaltyProducer.h @@ -31,7 +31,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction , const TargetPhrase &targetPhrase , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &estimatedFutureScore) const; - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp index 554107c326..ed88c0e7b2 100644 --- a/moses/FF/WordTranslationFeature.cpp +++ b/moses/FF/WordTranslationFeature.cpp @@ -137,7 +137,7 @@ void WordTranslationFeature::Load() } } -void WordTranslationFeature::Evaluate +void WordTranslationFeature::EvaluateWhenApplied (const Hypothesis& hypo, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h index 91dce3130b..9de73eaef8 100644 --- a/moses/FF/WordTranslationFeature.h +++ b/moses/FF/WordTranslationFeature.h @@ -48,7 +48,7 @@ class WordTranslationFeature : public StatelessFeatureFunction return new DummyState(); } - void Evaluate(const Hypothesis& hypo, + void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; void EvaluateChart(const ChartHypothesis &hypo, diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index 400fd0e0f7..61e7c3f714 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -211,7 +211,7 @@ void Hypothesis::EvaluateWith(const StatefulFeatureFunction &sfff, { const StaticData &staticData = StaticData::Instance(); if (! staticData.IsFeatureFunctionIgnored( sfff )) { - m_ffStates[state_idx] = sfff.Evaluate( + m_ffStates[state_idx] = sfff.EvaluateWhenApplied( *this, m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL, &m_scoreBreakdown); @@ -222,7 +222,7 @@ void Hypothesis::EvaluateWith(const StatelessFeatureFunction& slff) { const StaticData &staticData = StaticData::Instance(); if (! staticData.IsFeatureFunctionIgnored( slff )) { - slff.Evaluate(*this, &m_scoreBreakdown); + slff.EvaluateWhenApplied(*this, &m_scoreBreakdown); } } @@ -254,7 +254,7 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore) const StatefulFeatureFunction &ff = *ffs[i]; const StaticData &staticData = StaticData::Instance(); if (! staticData.IsFeatureFunctionIgnored(ff)) { - m_ffStates[i] = ff.Evaluate(*this, + m_ffStates[i] = ff.EvaluateWhenApplied(*this, m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL, &m_scoreBreakdown); } diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp index 420efd9e8e..943b4f3af5 100644 --- a/moses/LM/DALMWrapper.cpp +++ b/moses/LM/DALMWrapper.cpp @@ -288,7 +288,7 @@ void LanguageModelDALM::CalcScore(const Phrase &phrase, float &fullScore, float ngramScore = TransformLMScore(ngramScore); } -FFState *LanguageModelDALM::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{ +FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const{ // In this function, we only compute the LM scores of n-grams that overlap a // phrase boundary. Phrase-internal scores are taken directly from the // translation option. diff --git a/moses/LM/DALMWrapper.h b/moses/LM/DALMWrapper.h index c791eeea66..ae3618cf9d 100644 --- a/moses/LM/DALMWrapper.h +++ b/moses/LM/DALMWrapper.h @@ -34,7 +34,7 @@ class LanguageModelDALM : public LanguageModel virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; - virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; + virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; virtual FFState *EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const; diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index ef09fbc77f..35082c2b15 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -134,7 +134,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco } } -FFState *LanguageModelImplementation::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const +FFState *LanguageModelImplementation::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { // In this function, we only compute the LM scores of n-grams that overlap a // phrase boundary. Phrase-internal scores are taken directly from the diff --git a/moses/LM/Implementation.h b/moses/LM/Implementation.h index a39f5e42b6..6c8ee50a7b 100644 --- a/moses/LM/Implementation.h +++ b/moses/LM/Implementation.h @@ -89,7 +89,7 @@ class LanguageModelImplementation : public LanguageModel void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; - FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; + FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const; diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp index 2dfb58c239..2674dbe8c7 100644 --- a/moses/LM/Ken.cpp +++ b/moses/LM/Ken.cpp @@ -229,7 +229,7 @@ template void LanguageModelKen::CalcScore(const Phrase &phr fullScore = TransformLMScore(fullScore); } -template FFState *LanguageModelKen::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const +template FFState *LanguageModelKen::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { const lm::ngram::State &in_state = static_cast(*ps).state; diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h index e5950f5913..931ba24129 100644 --- a/moses/LM/Ken.h +++ b/moses/LM/Ken.h @@ -55,7 +55,7 @@ template class LanguageModelKen : public LanguageModel virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; - virtual FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; + virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; virtual FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp index 719e05e7d3..d1064c6aea 100644 --- a/moses/ScoreComponentCollectionTest.cpp +++ b/moses/ScoreComponentCollectionTest.cpp @@ -34,7 +34,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction public: MockStatelessFeatureFunction(size_t n, const string &line) : StatelessFeatureFunction(n, line) {} - void Evaluate(const Hypothesis&, ScoreComponentCollection*) const {} + void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {} void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {} void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath From e197b110fcc9c1a708da1fd88ec7f79492e74ff4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 9 Jul 2014 23:54:16 +0100 Subject: [PATCH 39/84] rename Evaluate() to EvaluateWhenApplied() --- moses/ChartHypothesis.cpp | 4 ++-- moses/FF/BleuScoreFeature.cpp | 2 +- moses/FF/BleuScoreFeature.h | 2 +- moses/FF/ConstrainedDecoding.cpp | 2 +- moses/FF/ConstrainedDecoding.h | 2 +- moses/FF/ControlRecombination.cpp | 2 +- moses/FF/ControlRecombination.h | 2 +- moses/FF/CountNonTerms.h | 2 +- moses/FF/CoveredReferenceFeature.cpp | 2 +- moses/FF/CoveredReferenceFeature.h | 2 +- moses/FF/DecodeFeature.h | 2 +- moses/FF/DistortionScoreProducer.h | 2 +- moses/FF/ExternalFeature.cpp | 2 +- moses/FF/ExternalFeature.h | 2 +- moses/FF/GlobalLexicalModel.h | 2 +- moses/FF/GlobalLexicalModelUnlimited.h | 2 +- moses/FF/HyperParameterAsWeight.h | 2 +- moses/FF/InputFeature.h | 2 +- moses/FF/InternalStructStatelessFF.h | 2 +- moses/FF/LexicalReordering/LexicalReordering.h | 2 +- moses/FF/MaxSpanFreeNonTermSource.h | 2 +- moses/FF/NieceTerminal.cpp | 2 +- moses/FF/NieceTerminal.h | 2 +- moses/FF/OSM-Feature/OpSequenceModel.cpp | 2 +- moses/FF/OSM-Feature/OpSequenceModel.h | 2 +- moses/FF/PhraseBoundaryFeature.h | 2 +- moses/FF/PhraseLengthFeature.h | 2 +- moses/FF/PhrasePairFeature.h | 2 +- moses/FF/PhrasePenalty.h | 2 +- moses/FF/ReferenceComparison.h | 2 +- moses/FF/RuleScope.h | 2 +- moses/FF/SetSourcePhrase.h | 2 +- moses/FF/SkeletonStatefulFF.cpp | 2 +- moses/FF/SkeletonStatefulFF.h | 2 +- moses/FF/SkeletonStatelessFF.cpp | 2 +- moses/FF/SkeletonStatelessFF.h | 2 +- moses/FF/SoftMatchingFeature.cpp | 2 +- moses/FF/SoftMatchingFeature.h | 2 +- moses/FF/SourceGHKMTreeInputMatchFeature.h | 2 +- moses/FF/SourceWordDeletionFeature.h | 2 +- moses/FF/SpanLength.h | 2 +- moses/FF/SparseHieroReorderingFeature.cpp | 2 +- moses/FF/SparseHieroReorderingFeature.h | 2 +- moses/FF/StatefulFeatureFunction.h | 2 +- moses/FF/StatelessFeatureFunction.h | 2 +- moses/FF/SyntaxRHS.cpp | 2 +- moses/FF/SyntaxRHS.h | 2 +- moses/FF/TargetBigramFeature.h | 2 +- moses/FF/TargetNgramFeature.cpp | 2 +- moses/FF/TargetNgramFeature.h | 2 +- moses/FF/TargetWordInsertionFeature.h | 2 +- moses/FF/TreeStructureFeature.cpp | 2 +- moses/FF/TreeStructureFeature.h | 2 +- moses/FF/UnknownWordPenaltyProducer.h | 2 +- moses/FF/WordPenaltyProducer.h | 2 +- moses/FF/WordTranslationFeature.cpp | 2 +- moses/FF/WordTranslationFeature.h | 2 +- moses/Incremental.cpp | 2 +- moses/LM/DALMWrapper.cpp | 2 +- moses/LM/DALMWrapper.h | 2 +- moses/LM/Implementation.cpp | 2 +- moses/LM/Implementation.h | 2 +- moses/LM/Ken.cpp | 4 ++-- moses/LM/Ken.h | 2 +- moses/LM/LDHT.cpp | 4 ++-- moses/ScoreComponentCollectionTest.cpp | 2 +- moses/SyntacticLanguageModel.h | 2 +- 67 files changed, 70 insertions(+), 70 deletions(-) diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp index 212a28d237..2bcc480e77 100644 --- a/moses/ChartHypothesis.cpp +++ b/moses/ChartHypothesis.cpp @@ -200,7 +200,7 @@ void ChartHypothesis::Evaluate() StatelessFeatureFunction::GetStatelessFeatureFunctions(); for (unsigned i = 0; i < sfs.size(); ++i) { if (! staticData.IsFeatureFunctionIgnored( *sfs[i] )) { - sfs[i]->EvaluateChart(*this,&m_scoreBreakdown); + sfs[i]->EvaluateWhenApplied(*this,&m_scoreBreakdown); } } @@ -208,7 +208,7 @@ void ChartHypothesis::Evaluate() StatefulFeatureFunction::GetStatefulFeatureFunctions(); for (unsigned i = 0; i < ffs.size(); ++i) { if (! staticData.IsFeatureFunctionIgnored( *ffs[i] )) { - m_ffStates[i] = ffs[i]->EvaluateChart(*this,i,&m_scoreBreakdown); + m_ffStates[i] = ffs[i]->EvaluateWhenApplied(*this,i,&m_scoreBreakdown); } } diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp index 0fb1e257d8..0d0a20797a 100644 --- a/moses/FF/BleuScoreFeature.cpp +++ b/moses/FF/BleuScoreFeature.cpp @@ -563,7 +563,7 @@ FFState* BleuScoreFeature::EvaluateWhenApplied(const Hypothesis& cur_hypo, return new_state; } -FFState* BleuScoreFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, +FFState* BleuScoreFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator ) const { if (!m_enabled) return new BleuScoreState(); diff --git a/moses/FF/BleuScoreFeature.h b/moses/FF/BleuScoreFeature.h index c383648f06..cdba578acb 100644 --- a/moses/FF/BleuScoreFeature.h +++ b/moses/FF/BleuScoreFeature.h @@ -118,7 +118,7 @@ class BleuScoreFeature : public StatefulFeatureFunction FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; - FFState* EvaluateChart(const ChartHypothesis& cur_hypo, + FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const; void EvaluateWithSourceContext(const InputType &input diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp index e0bc188ad6..bfe4129135 100644 --- a/moses/FF/ConstrainedDecoding.cpp +++ b/moses/FF/ConstrainedDecoding.cpp @@ -143,7 +143,7 @@ FFState* ConstrainedDecoding::EvaluateWhenApplied( return ret; } -FFState* ConstrainedDecoding::EvaluateChart( +FFState* ConstrainedDecoding::EvaluateWhenApplied( const ChartHypothesis &hypo, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/ConstrainedDecoding.h b/moses/FF/ConstrainedDecoding.h index c7eef65223..ca007f21d4 100644 --- a/moses/FF/ConstrainedDecoding.h +++ b/moses/FF/ConstrainedDecoding.h @@ -60,7 +60,7 @@ class ConstrainedDecoding : public StatefulFeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const; - FFState* EvaluateChart( + FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/ControlRecombination.cpp b/moses/FF/ControlRecombination.cpp index adc36145ea..85e88ac943 100644 --- a/moses/FF/ControlRecombination.cpp +++ b/moses/FF/ControlRecombination.cpp @@ -64,7 +64,7 @@ FFState* ControlRecombination::EvaluateWhenApplied( return new ControlRecombinationState(hypo, *this); } -FFState* ControlRecombination::EvaluateChart( +FFState* ControlRecombination::EvaluateWhenApplied( const ChartHypothesis &hypo, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/ControlRecombination.h b/moses/FF/ControlRecombination.h index f5b48027b8..095cc6b298 100644 --- a/moses/FF/ControlRecombination.h +++ b/moses/FF/ControlRecombination.h @@ -74,7 +74,7 @@ class ControlRecombination : public StatefulFeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const; - FFState* EvaluateChart( + FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/CountNonTerms.h b/moses/FF/CountNonTerms.h index 0962da5569..c4e1467e9e 100644 --- a/moses/FF/CountNonTerms.h +++ b/moses/FF/CountNonTerms.h @@ -29,7 +29,7 @@ class CountNonTerms : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - void EvaluateChart( + void EvaluateWhenApplied( const ChartHypothesis& hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/CoveredReferenceFeature.cpp b/moses/FF/CoveredReferenceFeature.cpp index 1a43b29888..3a2482d0df 100644 --- a/moses/FF/CoveredReferenceFeature.cpp +++ b/moses/FF/CoveredReferenceFeature.cpp @@ -131,7 +131,7 @@ FFState* CoveredReferenceFeature::EvaluateWhenApplied( return ret; } -FFState* CoveredReferenceFeature::EvaluateChart( +FFState* CoveredReferenceFeature::EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/CoveredReferenceFeature.h b/moses/FF/CoveredReferenceFeature.h index 4d4275f293..a6cdd6f998 100644 --- a/moses/FF/CoveredReferenceFeature.h +++ b/moses/FF/CoveredReferenceFeature.h @@ -66,7 +66,7 @@ class CoveredReferenceFeature : public StatefulFeatureFunction const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; - FFState* EvaluateChart( + FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/DecodeFeature.h b/moses/FF/DecodeFeature.h index c13af8d710..ac4e9392b6 100644 --- a/moses/FF/DecodeFeature.h +++ b/moses/FF/DecodeFeature.h @@ -65,7 +65,7 @@ class DecodeFeature : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} void EvaluateWithSourceContext(const InputType &input diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h index 5f90c6e591..aa2c18b95d 100644 --- a/moses/FF/DistortionScoreProducer.h +++ b/moses/FF/DistortionScoreProducer.h @@ -33,7 +33,7 @@ class DistortionScoreProducer : public StatefulFeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const; - virtual FFState* EvaluateChart( + virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection*) const { diff --git a/moses/FF/ExternalFeature.cpp b/moses/FF/ExternalFeature.cpp index 8fe6125441..10800d24df 100644 --- a/moses/FF/ExternalFeature.cpp +++ b/moses/FF/ExternalFeature.cpp @@ -59,7 +59,7 @@ FFState* ExternalFeature::EvaluateWhenApplied( return new ExternalFeatureState(m_stateSize); } -FFState* ExternalFeature::EvaluateChart( +FFState* ExternalFeature::EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/ExternalFeature.h b/moses/FF/ExternalFeature.h index f845842a5c..a8916a853f 100644 --- a/moses/FF/ExternalFeature.h +++ b/moses/FF/ExternalFeature.h @@ -68,7 +68,7 @@ class ExternalFeature : public StatefulFeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const; - FFState* EvaluateChart( + FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/GlobalLexicalModel.h b/moses/FF/GlobalLexicalModel.h index 1af2e79e1a..151dbf4724 100644 --- a/moses/FF/GlobalLexicalModel.h +++ b/moses/FF/GlobalLexicalModel.h @@ -74,7 +74,7 @@ class GlobalLexicalModel : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const; - void EvaluateChart( + void EvaluateWhenApplied( const ChartHypothesis& hypo, ScoreComponentCollection* accumulator) const { throw std::logic_error("GlobalLexicalModel not supported in chart decoder, yet"); diff --git a/moses/FF/GlobalLexicalModelUnlimited.h b/moses/FF/GlobalLexicalModelUnlimited.h index 688cc607cb..096254613c 100644 --- a/moses/FF/GlobalLexicalModelUnlimited.h +++ b/moses/FF/GlobalLexicalModelUnlimited.h @@ -84,7 +84,7 @@ class GlobalLexicalModelUnlimited : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const; - void EvaluateChart(const ChartHypothesis& /* cur_hypo */, + void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */, int /* featureID */, ScoreComponentCollection* ) const { throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet"); diff --git a/moses/FF/HyperParameterAsWeight.h b/moses/FF/HyperParameterAsWeight.h index 4ed181431f..aaad21c14a 100644 --- a/moses/FF/HyperParameterAsWeight.h +++ b/moses/FF/HyperParameterAsWeight.h @@ -38,7 +38,7 @@ class HyperParameterAsWeight : public StatelessFeatureFunction /** * Same for chart-based features. **/ - virtual void EvaluateChart(const ChartHypothesis &hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/InputFeature.h b/moses/FF/InputFeature.h index 7193d90d37..ad4fe398a5 100644 --- a/moses/FF/InputFeature.h +++ b/moses/FF/InputFeature.h @@ -57,7 +57,7 @@ class InputFeature : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/InternalStructStatelessFF.h b/moses/FF/InternalStructStatelessFF.h index 821dfb826a..2ed8801e20 100644 --- a/moses/FF/InternalStructStatelessFF.h +++ b/moses/FF/InternalStructStatelessFF.h @@ -30,7 +30,7 @@ class InternalStructStatelessFF : public StatelessFeatureFunction virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/LexicalReordering/LexicalReordering.h b/moses/FF/LexicalReordering/LexicalReordering.h index b6610639ea..09d3b73cc6 100644 --- a/moses/FF/LexicalReordering/LexicalReordering.h +++ b/moses/FF/LexicalReordering/LexicalReordering.h @@ -49,7 +49,7 @@ class LexicalReordering : public StatefulFeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const; - virtual FFState* EvaluateChart(const ChartHypothesis&, + virtual FFState* EvaluateWhenApplied(const ChartHypothesis&, int /* featureID */, ScoreComponentCollection*) const { UTIL_THROW(util::Exception, "LexicalReordering is not valid for chart decoder"); diff --git a/moses/FF/MaxSpanFreeNonTermSource.h b/moses/FF/MaxSpanFreeNonTermSource.h index df5b98417b..973b374d8e 100644 --- a/moses/FF/MaxSpanFreeNonTermSource.h +++ b/moses/FF/MaxSpanFreeNonTermSource.h @@ -31,7 +31,7 @@ class MaxSpanFreeNonTermSource : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - virtual void EvaluateChart(const ChartHypothesis &hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/NieceTerminal.cpp b/moses/FF/NieceTerminal.cpp index 921a95cbe3..b3a5f8f922 100644 --- a/moses/FF/NieceTerminal.cpp +++ b/moses/FF/NieceTerminal.cpp @@ -75,7 +75,7 @@ void NieceTerminal::EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} -void NieceTerminal::EvaluateChart(const ChartHypothesis &hypo, +void NieceTerminal::EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/NieceTerminal.h b/moses/FF/NieceTerminal.h index 93e55d6347..7daf2963e6 100644 --- a/moses/FF/NieceTerminal.h +++ b/moses/FF/NieceTerminal.h @@ -31,7 +31,7 @@ class NieceTerminal : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const; void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; void SetParameter(const std::string& key, const std::string& value); diff --git a/moses/FF/OSM-Feature/OpSequenceModel.cpp b/moses/FF/OSM-Feature/OpSequenceModel.cpp index bc245d988b..7939421513 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.cpp +++ b/moses/FF/OSM-Feature/OpSequenceModel.cpp @@ -194,7 +194,7 @@ FFState* OpSequenceModel::EvaluateWhenApplied( // return NULL; } -FFState* OpSequenceModel::EvaluateChart( +FFState* OpSequenceModel::EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/OSM-Feature/OpSequenceModel.h b/moses/FF/OSM-Feature/OpSequenceModel.h index 7dbe2e0ca0..c4d26f98ef 100644 --- a/moses/FF/OSM-Feature/OpSequenceModel.h +++ b/moses/FF/OSM-Feature/OpSequenceModel.h @@ -34,7 +34,7 @@ class OpSequenceModel : public StatefulFeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const; - virtual FFState* EvaluateChart( + virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/PhraseBoundaryFeature.h b/moses/FF/PhraseBoundaryFeature.h index 9aec700dcb..e4c3ca3bab 100644 --- a/moses/FF/PhraseBoundaryFeature.h +++ b/moses/FF/PhraseBoundaryFeature.h @@ -47,7 +47,7 @@ class PhraseBoundaryFeature : public StatefulFeatureFunction virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; - virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */, + virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID */, ScoreComponentCollection* ) const { throw std::logic_error("PhraseBoundaryState not supported in chart decoder, yet"); diff --git a/moses/FF/PhraseLengthFeature.h b/moses/FF/PhraseLengthFeature.h index cf26d9a20b..4976e22100 100644 --- a/moses/FF/PhraseLengthFeature.h +++ b/moses/FF/PhraseLengthFeature.h @@ -28,7 +28,7 @@ class PhraseLengthFeature : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis& hypo, + void EvaluateWhenApplied(const ChartHypothesis& hypo, ScoreComponentCollection*) const { throw std::logic_error("PhraseLengthFeature not valid in chart decoder"); } diff --git a/moses/FF/PhrasePairFeature.h b/moses/FF/PhrasePairFeature.h index 94bf35af3e..8bfac628d1 100644 --- a/moses/FF/PhrasePairFeature.h +++ b/moses/FF/PhrasePairFeature.h @@ -40,7 +40,7 @@ class PhrasePairFeature: public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; - void EvaluateChart(const ChartHypothesis& hypo, + void EvaluateWhenApplied(const ChartHypothesis& hypo, ScoreComponentCollection*) const { throw std::logic_error("PhrasePairFeature not valid in chart decoder"); } diff --git a/moses/FF/PhrasePenalty.h b/moses/FF/PhrasePenalty.h index 2babc7d676..f822e583b8 100644 --- a/moses/FF/PhrasePenalty.h +++ b/moses/FF/PhrasePenalty.h @@ -22,7 +22,7 @@ class PhrasePenalty : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/ReferenceComparison.h b/moses/FF/ReferenceComparison.h index 571242ce43..62cf15ced5 100644 --- a/moses/FF/ReferenceComparison.h +++ b/moses/FF/ReferenceComparison.h @@ -33,7 +33,7 @@ class ReferenceComparison : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - virtual void EvaluateChart(const ChartHypothesis &hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/RuleScope.h b/moses/FF/RuleScope.h index a051e411ad..a2c9e06f36 100644 --- a/moses/FF/RuleScope.h +++ b/moses/FF/RuleScope.h @@ -31,7 +31,7 @@ class RuleScope : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - virtual void EvaluateChart(const ChartHypothesis &hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SetSourcePhrase.h b/moses/FF/SetSourcePhrase.h index 3f5bc82868..81f293dde5 100644 --- a/moses/FF/SetSourcePhrase.h +++ b/moses/FF/SetSourcePhrase.h @@ -31,7 +31,7 @@ class SetSourcePhrase : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - virtual void EvaluateChart(const ChartHypothesis &hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SkeletonStatefulFF.cpp b/moses/FF/SkeletonStatefulFF.cpp index 0d1a0f9118..fe81aeeae1 100644 --- a/moses/FF/SkeletonStatefulFF.cpp +++ b/moses/FF/SkeletonStatefulFF.cpp @@ -56,7 +56,7 @@ FFState* SkeletonStatefulFF::EvaluateWhenApplied( return new SkeletonState(0); } -FFState* SkeletonStatefulFF::EvaluateChart( +FFState* SkeletonStatefulFF::EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const diff --git a/moses/FF/SkeletonStatefulFF.h b/moses/FF/SkeletonStatefulFF.h index fd93bce55a..6fa26803eb 100644 --- a/moses/FF/SkeletonStatefulFF.h +++ b/moses/FF/SkeletonStatefulFF.h @@ -44,7 +44,7 @@ class SkeletonStatefulFF : public StatefulFeatureFunction const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; - FFState* EvaluateChart( + FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/SkeletonStatelessFF.cpp b/moses/FF/SkeletonStatelessFF.cpp index 446d57b932..80c7d130ed 100644 --- a/moses/FF/SkeletonStatelessFF.cpp +++ b/moses/FF/SkeletonStatelessFF.cpp @@ -48,7 +48,7 @@ void SkeletonStatelessFF::EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} -void SkeletonStatelessFF::EvaluateChart(const ChartHypothesis &hypo, +void SkeletonStatelessFF::EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SkeletonStatelessFF.h b/moses/FF/SkeletonStatelessFF.h index 5d772b398c..520ec14058 100644 --- a/moses/FF/SkeletonStatelessFF.h +++ b/moses/FF/SkeletonStatelessFF.h @@ -27,7 +27,7 @@ class SkeletonStatelessFF : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const; void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; void SetParameter(const std::string& key, const std::string& value); diff --git a/moses/FF/SoftMatchingFeature.cpp b/moses/FF/SoftMatchingFeature.cpp index 017e551c41..0475547daa 100644 --- a/moses/FF/SoftMatchingFeature.cpp +++ b/moses/FF/SoftMatchingFeature.cpp @@ -61,7 +61,7 @@ bool SoftMatchingFeature::Load(const std::string& filePath) return true; } -void SoftMatchingFeature::EvaluateChart(const ChartHypothesis& hypo, +void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/SoftMatchingFeature.h b/moses/FF/SoftMatchingFeature.h index 37568325f1..ff923ea082 100644 --- a/moses/FF/SoftMatchingFeature.h +++ b/moses/FF/SoftMatchingFeature.h @@ -19,7 +19,7 @@ class SoftMatchingFeature : public StatelessFeatureFunction return true; } - virtual void EvaluateChart(const ChartHypothesis& hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis& hypo, ScoreComponentCollection* accumulator) const; void EvaluateInIsolation(const Phrase &source diff --git a/moses/FF/SourceGHKMTreeInputMatchFeature.h b/moses/FF/SourceGHKMTreeInputMatchFeature.h index ef9155f489..743871b1c3 100644 --- a/moses/FF/SourceGHKMTreeInputMatchFeature.h +++ b/moses/FF/SourceGHKMTreeInputMatchFeature.h @@ -32,7 +32,7 @@ class SourceGHKMTreeInputMatchFeature : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {}; - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {}; }; diff --git a/moses/FF/SourceWordDeletionFeature.h b/moses/FF/SourceWordDeletionFeature.h index a2fec0f85c..8211ef0ca1 100644 --- a/moses/FF/SourceWordDeletionFeature.h +++ b/moses/FF/SourceWordDeletionFeature.h @@ -42,7 +42,7 @@ class SourceWordDeletionFeature : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SpanLength.h b/moses/FF/SpanLength.h index 7792cc6d09..dc5564fcdf 100644 --- a/moses/FF/SpanLength.h +++ b/moses/FF/SpanLength.h @@ -31,7 +31,7 @@ class SpanLength : public StatelessFeatureFunction ScoreComponentCollection* accumulator) const {} - virtual void EvaluateChart(const ChartHypothesis &hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SparseHieroReorderingFeature.cpp b/moses/FF/SparseHieroReorderingFeature.cpp index f42f5de2fc..0c6ac47672 100644 --- a/moses/FF/SparseHieroReorderingFeature.cpp +++ b/moses/FF/SparseHieroReorderingFeature.cpp @@ -81,7 +81,7 @@ const Factor* SparseHieroReorderingFeature::GetFactor(const Word& word, const Vo return factor; } -void SparseHieroReorderingFeature::EvaluateChart( +void SparseHieroReorderingFeature::EvaluateWhenApplied( const ChartHypothesis& cur_hypo , ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/SparseHieroReorderingFeature.h b/moses/FF/SparseHieroReorderingFeature.h index 45ff1884a2..d631fdec10 100644 --- a/moses/FF/SparseHieroReorderingFeature.h +++ b/moses/FF/SparseHieroReorderingFeature.h @@ -47,7 +47,7 @@ class SparseHieroReorderingFeature : public StatelessFeatureFunction virtual void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/StatefulFeatureFunction.h b/moses/FF/StatefulFeatureFunction.h index bf47ec5cf0..86bed04eea 100644 --- a/moses/FF/StatefulFeatureFunction.h +++ b/moses/FF/StatefulFeatureFunction.h @@ -34,7 +34,7 @@ class StatefulFeatureFunction: public FeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const = 0; - virtual FFState* EvaluateChart( + virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const = 0; diff --git a/moses/FF/StatelessFeatureFunction.h b/moses/FF/StatelessFeatureFunction.h index e300ac8d32..94029f8827 100644 --- a/moses/FF/StatelessFeatureFunction.h +++ b/moses/FF/StatelessFeatureFunction.h @@ -29,7 +29,7 @@ class StatelessFeatureFunction: public FeatureFunction /** * Same for chart-based features. **/ - virtual void EvaluateChart(const ChartHypothesis &hypo, + virtual void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const = 0; virtual bool IsStateless() const { diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp index de886cd0cb..5168b72d7c 100644 --- a/moses/FF/SyntaxRHS.cpp +++ b/moses/FF/SyntaxRHS.cpp @@ -46,7 +46,7 @@ void SyntaxRHS::EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} -void SyntaxRHS::EvaluateChart(const ChartHypothesis &hypo, +void SyntaxRHS::EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/SyntaxRHS.h b/moses/FF/SyntaxRHS.h index ed455220bd..4b92149952 100644 --- a/moses/FF/SyntaxRHS.h +++ b/moses/FF/SyntaxRHS.h @@ -27,7 +27,7 @@ class SyntaxRHS : public StatelessFeatureFunction , ScoreComponentCollection *estimatedFutureScore = NULL) const; void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; }; diff --git a/moses/FF/TargetBigramFeature.h b/moses/FF/TargetBigramFeature.h index 6b26bb2695..c63f3caa41 100644 --- a/moses/FF/TargetBigramFeature.h +++ b/moses/FF/TargetBigramFeature.h @@ -42,7 +42,7 @@ class TargetBigramFeature : public StatefulFeatureFunction virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; - virtual FFState* EvaluateChart( const ChartHypothesis& /* cur_hypo */, + virtual FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID */, ScoreComponentCollection* ) const { throw std::logic_error("TargetBigramFeature not valid in chart decoder"); diff --git a/moses/FF/TargetNgramFeature.cpp b/moses/FF/TargetNgramFeature.cpp index a2fc4e0400..a434109902 100644 --- a/moses/FF/TargetNgramFeature.cpp +++ b/moses/FF/TargetNgramFeature.cpp @@ -207,7 +207,7 @@ void TargetNgramFeature::appendNgram(const Word& word, bool& skip, stringstream } } -FFState* TargetNgramFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const +FFState* TargetNgramFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const { vector contextFactor; contextFactor.reserve(m_n); diff --git a/moses/FF/TargetNgramFeature.h b/moses/FF/TargetNgramFeature.h index 914538dd4b..e87252670a 100644 --- a/moses/FF/TargetNgramFeature.h +++ b/moses/FF/TargetNgramFeature.h @@ -189,7 +189,7 @@ class TargetNgramFeature : public StatefulFeatureFunction virtual FFState* EvaluateWhenApplied(const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const; - virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId, + virtual FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureId, ScoreComponentCollection* accumulator) const; void EvaluateWithSourceContext(const InputType &input diff --git a/moses/FF/TargetWordInsertionFeature.h b/moses/FF/TargetWordInsertionFeature.h index 6d48e7a982..06fa25400a 100644 --- a/moses/FF/TargetWordInsertionFeature.h +++ b/moses/FF/TargetWordInsertionFeature.h @@ -42,7 +42,7 @@ class TargetWordInsertionFeature : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} diff --git a/moses/FF/TreeStructureFeature.cpp b/moses/FF/TreeStructureFeature.cpp index a5446891ae..c0505edd6e 100644 --- a/moses/FF/TreeStructureFeature.cpp +++ b/moses/FF/TreeStructureFeature.cpp @@ -266,7 +266,7 @@ void TreeStructureFeature::AddNTLabels(TreePointer root) const { } } -FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo +FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo , int featureID /* used to index the state in the previous hypotheses */ , ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/TreeStructureFeature.h b/moses/FF/TreeStructureFeature.h index 100e378c65..a81d604bb1 100644 --- a/moses/FF/TreeStructureFeature.h +++ b/moses/FF/TreeStructureFeature.h @@ -166,7 +166,7 @@ class TreeStructureFeature : public StatefulFeatureFunction const Hypothesis& cur_hypo, const FFState* prev_state, ScoreComponentCollection* accumulator) const {UTIL_THROW(util::Exception, "Not implemented");}; - FFState* EvaluateChart( + FFState* EvaluateWhenApplied( const ChartHypothesis& /* cur_hypo */, int /* featureID - used to index the state in the previous hypotheses */, ScoreComponentCollection* accumulator) const; diff --git a/moses/FF/UnknownWordPenaltyProducer.h b/moses/FF/UnknownWordPenaltyProducer.h index d1ac80a026..8850641e51 100644 --- a/moses/FF/UnknownWordPenaltyProducer.h +++ b/moses/FF/UnknownWordPenaltyProducer.h @@ -34,7 +34,7 @@ class UnknownWordPenaltyProducer : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} void EvaluateWithSourceContext(const InputType &input diff --git a/moses/FF/WordPenaltyProducer.h b/moses/FF/WordPenaltyProducer.h index 5dc07c679c..e628773075 100644 --- a/moses/FF/WordPenaltyProducer.h +++ b/moses/FF/WordPenaltyProducer.h @@ -34,7 +34,7 @@ class WordPenaltyProducer : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const {} - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const {} void EvaluateWithSourceContext(const InputType &input diff --git a/moses/FF/WordTranslationFeature.cpp b/moses/FF/WordTranslationFeature.cpp index ed88c0e7b2..7a98ad4c8a 100644 --- a/moses/FF/WordTranslationFeature.cpp +++ b/moses/FF/WordTranslationFeature.cpp @@ -349,7 +349,7 @@ void WordTranslationFeature::EvaluateWhenApplied } } -void WordTranslationFeature::EvaluateChart( +void WordTranslationFeature::EvaluateWhenApplied( const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const { diff --git a/moses/FF/WordTranslationFeature.h b/moses/FF/WordTranslationFeature.h index 9de73eaef8..c213d8eb3e 100644 --- a/moses/FF/WordTranslationFeature.h +++ b/moses/FF/WordTranslationFeature.h @@ -51,7 +51,7 @@ class WordTranslationFeature : public StatelessFeatureFunction void EvaluateWhenApplied(const Hypothesis& hypo, ScoreComponentCollection* accumulator) const; - void EvaluateChart(const ChartHypothesis &hypo, + void EvaluateWhenApplied(const ChartHypothesis &hypo, ScoreComponentCollection* accumulator) const; void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index 4e593df7ef..c8a48d425a 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -327,7 +327,7 @@ void PhraseAndFeatures(const search::Applied final, Phrase &phrase, ScoreCompone const LanguageModel &model = LanguageModel::GetFirstLM(); model.CalcScore(phrase, full, ignored_ngram, ignored_oov); - // CalcScore transforms, but EvaluateChart doesn't. + // CalcScore transforms, but EvaluateWhenApplied doesn't. features.Assign(&model, full); } diff --git a/moses/LM/DALMWrapper.cpp b/moses/LM/DALMWrapper.cpp index 943b4f3af5..68b3050de5 100644 --- a/moses/LM/DALMWrapper.cpp +++ b/moses/LM/DALMWrapper.cpp @@ -339,7 +339,7 @@ FFState *LanguageModelDALM::EvaluateWhenApplied(const Hypothesis &hypo, const FF return dalm_state; } -FFState *LanguageModelDALM::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{ +FFState *LanguageModelDALM::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const{ // initialize language model context state DALMChartState *newState = new DALMChartState(); DALM::State &state = newState->GetRightContext(); diff --git a/moses/LM/DALMWrapper.h b/moses/LM/DALMWrapper.h index ae3618cf9d..ad53819c0a 100644 --- a/moses/LM/DALMWrapper.h +++ b/moses/LM/DALMWrapper.h @@ -36,7 +36,7 @@ class LanguageModelDALM : public LanguageModel virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; - virtual FFState *EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const; + virtual FFState *EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *out) const; virtual bool IsUseable(const FactorMask &mask) const; diff --git a/moses/LM/Implementation.cpp b/moses/LM/Implementation.cpp index 35082c2b15..bd5bd18345 100644 --- a/moses/LM/Implementation.cpp +++ b/moses/LM/Implementation.cpp @@ -222,7 +222,7 @@ FFState *LanguageModelImplementation::EvaluateWhenApplied(const Hypothesis &hypo return res; } -FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out) const +FFState* LanguageModelImplementation::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* out) const { LanguageModelChartState *ret = new LanguageModelChartState(hypo, featureID, GetNGramOrder()); // data structure for factored context phrase (history and predicted word) diff --git a/moses/LM/Implementation.h b/moses/LM/Implementation.h index 6c8ee50a7b..5eb8fb2096 100644 --- a/moses/LM/Implementation.h +++ b/moses/LM/Implementation.h @@ -91,7 +91,7 @@ class LanguageModelImplementation : public LanguageModel FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; - FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const; + FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const; void updateChartScore(float *prefixScore, float *finalScore, float score, size_t wordPos) const; diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp index 2674dbe8c7..e69746084a 100644 --- a/moses/LM/Ken.cpp +++ b/moses/LM/Ken.cpp @@ -79,7 +79,7 @@ struct KenLMState : public FFState { // // FFState *Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; // -// FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; +// FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; // // void IncrementalCallback(Incremental::Manager &manager) const { // manager.LMCallback(*m_ngram, m_lmIdLookup); @@ -307,7 +307,7 @@ class LanguageModelChartStateKenLM : public FFState lm::ngram::ChartState m_state; }; -template FFState *LanguageModelKen::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const +template FFState *LanguageModelKen::EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const { LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM(); lm::ngram::RuleScore ruleScore(*m_ngram, newState->GetChartState()); diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h index 931ba24129..2f473b697b 100644 --- a/moses/LM/Ken.h +++ b/moses/LM/Ken.h @@ -57,7 +57,7 @@ template class LanguageModelKen : public LanguageModel virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; - virtual FFState *EvaluateChart(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; + virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; virtual void IncrementalCallback(Incremental::Manager &manager) const; virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const; diff --git a/moses/LM/LDHT.cpp b/moses/LM/LDHT.cpp index 61226208ca..1d0331df5d 100644 --- a/moses/LM/LDHT.cpp +++ b/moses/LM/LDHT.cpp @@ -97,7 +97,7 @@ class LanguageModelLDHT : public LanguageModel FFState* Evaluate(const Hypothesis& hypo, const FFState* input_state, ScoreComponentCollection* score_output) const; - FFState* EvaluateChart(const ChartHypothesis& hypo, + FFState* EvaluateWhenApplied(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* accumulator) const; @@ -392,7 +392,7 @@ FFState* LanguageModelLDHT::Evaluate( return state; } -FFState* LanguageModelLDHT::EvaluateChart( +FFState* LanguageModelLDHT::EvaluateWhenApplied( const ChartHypothesis& hypo, int featureID, ScoreComponentCollection* accumulator) const diff --git a/moses/ScoreComponentCollectionTest.cpp b/moses/ScoreComponentCollectionTest.cpp index d1064c6aea..a238d66b87 100644 --- a/moses/ScoreComponentCollectionTest.cpp +++ b/moses/ScoreComponentCollectionTest.cpp @@ -35,7 +35,7 @@ class MockStatelessFeatureFunction : public StatelessFeatureFunction MockStatelessFeatureFunction(size_t n, const string &line) : StatelessFeatureFunction(n, line) {} void EvaluateWhenApplied(const Hypothesis&, ScoreComponentCollection*) const {} - void EvaluateChart(const ChartHypothesis&, ScoreComponentCollection*) const {} + void EvaluateWhenApplied(const ChartHypothesis&, ScoreComponentCollection*) const {} void EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase diff --git a/moses/SyntacticLanguageModel.h b/moses/SyntacticLanguageModel.h index 6e88d85c1e..76882a4d15 100644 --- a/moses/SyntacticLanguageModel.h +++ b/moses/SyntacticLanguageModel.h @@ -30,7 +30,7 @@ class SyntacticLanguageModel : public StatefulFeatureFunction const FFState* prev_state, ScoreComponentCollection* accumulator) const; - FFState* EvaluateChart(const ChartHypothesis& cur_hypo, + FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection* accumulator) const { throw std::runtime_error("Syntactic LM can only be used with phrase-based decoder."); From 98d464727b3312bb23ce57d0eb83d6d1f3e5ddb6 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 10 Jul 2014 11:40:32 +0100 Subject: [PATCH 40/84] add LBLLM from Blunsom's group --- contrib/other-builds/moses/.project | 10 +++ moses/FF/Factory.cpp | 2 + moses/FF/LBLLM.cpp | 131 ++++++++++++++++++++++++++++ moses/FF/LBLLM.h | 65 ++++++++++++++ 4 files changed, 208 insertions(+) create mode 100644 moses/FF/LBLLM.cpp create mode 100644 moses/FF/LBLLM.h diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index 1c22fca311..120323a085 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -1156,6 +1156,16 @@ 1 PARENT-3-PROJECT_LOC/moses/FF/InternalStructStatelessFF.h + + FF/LBLLM.cpp + 1 + PARENT-3-PROJECT_LOC/moses/FF/LBLLM.cpp + + + FF/LBLLM.h + 1 + PARENT-3-PROJECT_LOC/moses/FF/LBLLM.h + FF/LexicalReordering 2 diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index c9a7ef8fc7..38fbccc890 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -48,6 +48,7 @@ #include "NieceTerminal.h" #include "SpanLength.h" #include "SyntaxRHS.h" +#include "LBLLM.h" #include "moses/FF/SkeletonStatelessFF.h" #include "moses/FF/SkeletonStatefulFF.h" @@ -204,6 +205,7 @@ FeatureRegistry::FeatureRegistry() MOSES_FNAME(SparseHieroReorderingFeature); MOSES_FNAME(SpanLength); MOSES_FNAME(SyntaxRHS); + MOSES_FNAME(LBLLM); MOSES_FNAME(SkeletonStatelessFF); MOSES_FNAME(SkeletonStatefulFF); diff --git a/moses/FF/LBLLM.cpp b/moses/FF/LBLLM.cpp new file mode 100644 index 0000000000..c57d19acc0 --- /dev/null +++ b/moses/FF/LBLLM.cpp @@ -0,0 +1,131 @@ +#include +#include +#include "LBLLM.h" +#include "moses/ScoreComponentCollection.h" +#include "moses/Hypothesis.h" + +using namespace std; + +namespace Moses +{ +int LBLLMState::Compare(const FFState& other) const +{ + const LBLLMState &otherState = static_cast(other); + + if (m_targetLen == otherState.m_targetLen) + return 0; + return (m_targetLen < otherState.m_targetLen) ? -1 : +1; +} + +//////////////////////////////////////////////////////////////// +LBLLM::LBLLM(const std::string &line) + :StatefulFeatureFunction(3, line) +{ + ReadParameters(); +} + +void LBLLM::Load() +{ + { + cerr << "Reading LM from " << m_lmPath << " ...\n"; + //ifstream ifile(lm_file.c_str(), ios::in | ios::binary); + ifstream ifile(m_lmPath.c_str(), ios::in); + if (!ifile.good()) { + cerr << "Failed to open " << m_lmPath << " for reading\n"; + abort(); + } + boost::archive::text_iarchive ia(ifile); + ia >> lm; + dict = lm.label_set(); + } + /* + { + ifstream z_ifile((lm_file+".z").c_str(), ios::in); + if (!z_ifile.good()) { + cerr << "Failed to open " << (lm_file+".z") << " for reading\n"; + abort(); + } + cerr << "Reading LM Z from " << lm_file+".z" << " ...\n"; + boost::archive::text_iarchive ia(z_ifile); + ia >> z_approx; + } + */ + + cerr << "Initializing map contents (map size=" << dict.max() << ")\n"; + for (int i = 1; i < dict.max(); ++i) + AddToWordMap(i); + cerr << "Done.\n"; + ss_off = OrderToStateSize(kORDER)-1; // offset of "state size" member + FeatureFunction::SetStateSize(OrderToStateSize(kORDER)); + kSTART = dict.Convert(""); + kSTOP = dict.Convert(""); + kUNKNOWN = dict.Convert("_UNK_"); + kNONE = -1; + kSTAR = dict.Convert("<{STAR}>"); + last_id = 0; + + // optional online "adaptation" by training on previous references + if (reffile.size()) { + cerr << "Reference file: " << reffile << endl; + set rv; + oxlm::ReadFromFile(reffile, &dict, &ref_sents, &rv); + } + +} + +void LBLLM::EvaluateInIsolation(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const +{} + +void LBLLM::EvaluateWithSourceContext(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore) const +{} + +FFState* LBLLM::EvaluateWhenApplied( + const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const +{ + // dense scores + vector newScores(m_numScoreComponents); + newScores[0] = 1.5; + newScores[1] = 0.3; + newScores[2] = 0.4; + accumulator->PlusEquals(this, newScores); + + // sparse scores + accumulator->PlusEquals(this, "sparse-name", 2.4); + + // int targetLen = cur_hypo.GetCurrTargetPhrase().GetSize(); // ??? [UG] + return new LBLLMState(0); +} + +FFState* LBLLM::EvaluateWhenApplied( + const ChartHypothesis& /* cur_hypo */, + int /* featureID - used to index the state in the previous hypotheses */, + ScoreComponentCollection* accumulator) const +{ + return new LBLLMState(0); +} + +void LBLLM::SetParameter(const std::string& key, const std::string& value) +{ + if (key == "lm-file") { + m_lmPath = value; + } + else if (key == "ref-file") { + m_refPath = value; + } + else { + StatefulFeatureFunction::SetParameter(key, value); + } +} + +} + diff --git a/moses/FF/LBLLM.h b/moses/FF/LBLLM.h new file mode 100644 index 0000000000..3a667a0f87 --- /dev/null +++ b/moses/FF/LBLLM.h @@ -0,0 +1,65 @@ +#pragma once + +#include +#include "StatefulFeatureFunction.h" +#include "FFState.h" +#include "lbl/nlm.h" + +namespace Moses +{ + +class LBLLMState : public FFState +{ + int m_targetLen; +public: + LBLLMState(int targetLen) + {} + + int Compare(const FFState& other) const; +}; + +class LBLLM : public StatefulFeatureFunction +{ +public: + LBLLM(const std::string &line); + void Load(); + + bool IsUseable(const FactorMask &mask) const { + return true; + } + virtual const FFState* EmptyHypothesisState(const InputType &input) const { + return new LBLLMState(0); + } + + void EvaluateInIsolation(const Phrase &source + , const TargetPhrase &targetPhrase + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection &estimatedFutureScore) const; + void EvaluateWithSourceContext(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , const StackVec *stackVec + , ScoreComponentCollection &scoreBreakdown + , ScoreComponentCollection *estimatedFutureScore = NULL) const; + FFState* EvaluateWhenApplied( + const Hypothesis& cur_hypo, + const FFState* prev_state, + ScoreComponentCollection* accumulator) const; + FFState* EvaluateWhenApplied( + const ChartHypothesis& /* cur_hypo */, + int /* featureID - used to index the state in the previous hypotheses */, + ScoreComponentCollection* accumulator) const; + + void SetParameter(const std::string& key, const std::string& value); + +protected: + std::string m_lmPath, m_refPath; + + oxlm::Dict dict; + oxlm::FactoredOutputNLM lm; + +}; + + +} + From 5b2bafde7613fc8bbbf6e88563fc056f221535fa Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 10 Jul 2014 12:16:38 +0100 Subject: [PATCH 41/84] compiles with eclipse --- contrib/other-builds/moses/.cproject | 11 ++++----- contrib/other-builds/moses/.project | 20 ++++++++-------- moses/FF/Factory.cpp | 9 +++++-- moses/{FF => LM}/LBLLM.cpp | 35 ++-------------------------- moses/{FF => LM}/LBLLM.h | 7 ++++-- 5 files changed, 29 insertions(+), 53 deletions(-) rename moses/{FF => LM}/LBLLM.cpp (70%) rename moses/{FF => LM}/LBLLM.h (93%) diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject index 409adfcc57..1a06fe7a8b 100644 --- a/contrib/other-builds/moses/.cproject +++ b/contrib/other-builds/moses/.cproject @@ -41,6 +41,8 @@ + + + @@ -81,10 +79,11 @@ - + + - + From f340ede6b58ae61d2586c9da3d32fbe6cd9bdda4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 14 Jul 2014 19:15:20 +0100 Subject: [PATCH 49/84] new LBLLM based on LanguageModel class --- contrib/other-builds/CreateOnDiskPt/.cproject | 4 ++ contrib/other-builds/moses/.project | 10 ++++ moses/FF/Factory.cpp | 12 +++- moses/LM/oxlm/LBLLM2.cpp | 12 ++++ moses/LM/oxlm/LBLLM2.h | 58 +++++++++++++++++++ 5 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 moses/LM/oxlm/LBLLM2.cpp create mode 100644 moses/LM/oxlm/LBLLM2.h diff --git a/contrib/other-builds/CreateOnDiskPt/.cproject b/contrib/other-builds/CreateOnDiskPt/.cproject index 4c46d70f86..e114255db2 100644 --- a/contrib/other-builds/CreateOnDiskPt/.cproject +++ b/contrib/other-builds/CreateOnDiskPt/.cproject @@ -42,6 +42,9 @@