From 381c3f64d8a8d397a4d6488c80e241a919682307 Mon Sep 17 00:00:00 2001 From: mliska Date: Tue, 21 Oct 2014 11:44:31 +0200 Subject: [PATCH 01/14] std::unordered_set replaces std::set for m_seenPosition. --- moses/BitmapContainer.cpp | 2 +- moses/BitmapContainer.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp index 061a5953f6..fd6fc1cd07 100644 --- a/moses/BitmapContainer.cpp +++ b/moses/BitmapContainer.cpp @@ -223,7 +223,7 @@ Hypothesis *BackwardsEdge::CreateHypothesis(const Hypothesis &hypothesis, const bool BackwardsEdge::SeenPosition(const size_t x, const size_t y) { - std::set< int >::iterator iter = m_seenPosition.find((x<<16) + y); + boost::unordered_set< int >::iterator iter = m_seenPosition.find((x<<16) + y); return (iter != m_seenPosition.end()); } diff --git a/moses/BitmapContainer.h b/moses/BitmapContainer.h index 51f1659ef6..5000590812 100644 --- a/moses/BitmapContainer.h +++ b/moses/BitmapContainer.h @@ -33,6 +33,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "TypeDef.h" #include "WordsBitmap.h" +#include + namespace Moses { @@ -165,7 +167,7 @@ class BackwardsEdge const SquareMatrix &m_futurescore; std::vector< const Hypothesis* > m_hypotheses; - std::set< int > m_seenPosition; + boost::unordered_set< int > m_seenPosition; // We don't want to instantiate "empty" objects. BackwardsEdge(); From 0a264bf5e30f60ffbcb644de5fa35f7d3bfd6923 Mon Sep 17 00:00:00 2001 From: mliska Date: Mon, 20 Oct 2014 13:17:07 +0200 Subject: [PATCH 02/14] DistortionFunctionScorer dyn_cast removed in FF. --- moses/BitmapContainer.cpp | 16 ++++++---------- moses/FF/FeatureFunction.cpp | 6 ++++++ moses/FF/FeatureFunction.h | 7 +++++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp index 061a5953f6..2a24aeb6eb 100644 --- a/moses/BitmapContainer.cpp +++ b/moses/BitmapContainer.cpp @@ -56,19 +56,15 @@ class HypothesisScoreOrdererWithDistortion m_transOptRange(transOptRange) { m_totalWeightDistortion = 0; const StaticData &staticData = StaticData::Instance(); - const std::vector &ffs = FeatureFunction::GetFeatureFunctions(); - std::vector::const_iterator iter; + + const std::vector &ffs = FeatureFunction::GetDistortionFeatureFunctions(); + std::vector::const_iterator iter; for (iter = ffs.begin(); iter != ffs.end(); ++iter) { - const FeatureFunction *ff = *iter; + const DistortionScoreProducer *ff = *iter; - const DistortionScoreProducer *model = dynamic_cast(ff); - if (model) { - float weight =staticData.GetAllWeights().GetScoreForProducer(model); - m_totalWeightDistortion += weight; - } + float weight =staticData.GetAllWeights().GetScoreForProducer(ff); + m_totalWeightDistortion += weight; } - - } const WordsRange* m_transOptRange; diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp index 5d4e0f91e9..f83131c9d8 100644 --- a/moses/FF/FeatureFunction.cpp +++ b/moses/FF/FeatureFunction.cpp @@ -7,6 +7,7 @@ #include "moses/Manager.h" #include "moses/TranslationOption.h" #include "moses/Util.h" +#include "moses/FF/DistortionScoreProducer.h" using namespace std; @@ -16,6 +17,7 @@ namespace Moses multiset FeatureFunction::description_counts; std::vector FeatureFunction::s_staticColl; +std::vector FeatureFunction::s_staticCollDistortion; FeatureFunction &FeatureFunction::FindFeatureFunction(const std::string& name) { @@ -67,6 +69,10 @@ Initialize(const std::string &line) ScoreComponentCollection::RegisterScoreProducer(this); s_staticColl.push_back(this); + + const DistortionScoreProducer *distortion = dynamic_cast(this); + if(distortion) + s_staticCollDistortion.push_back (distortion); } FeatureFunction::~FeatureFunction() {} diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h index b30815e05f..0903a6b18f 100644 --- a/moses/FF/FeatureFunction.h +++ b/moses/FF/FeatureFunction.h @@ -22,6 +22,7 @@ class WordsRange; class FactorMask; class InputPath; class StackVec; +class DistortionScoreProducer; /** base class for all feature functions. */ @@ -30,6 +31,7 @@ class FeatureFunction protected: /**< all the score producers in this run */ static std::vector s_staticColl; + static std::vector s_staticCollDistortion; std::string m_description, m_argLine; std::vector > m_args; @@ -45,6 +47,11 @@ class FeatureFunction static const std::vector& GetFeatureFunctions() { return s_staticColl; } + + static const std::vector& GetDistortionFeatureFunctions() { + return s_staticCollDistortion; + } + static FeatureFunction &FindFeatureFunction(const std::string& name); static void Destroy(); From e1e14a91eeae253d2b17d3150cad56967c44d2f1 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Mon, 1 Dec 2014 11:26:53 +0800 Subject: [PATCH 03/14] Update train-model.perl The default hmm iterations of GIZA++ is 5. Even though the "hmm-align" option is not set. The hmm align is also activated when using the training script. --- scripts/training/train-model.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index 6dc65b8f3e..7fe524165d 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -1120,6 +1120,7 @@ sub run_single_giza { m2 => 0 , m3 => 3 , m4 => 3 , + hmmiterations => 0 , o => "giza" , nodumps => 1 , onlyaldumps => 1 , @@ -1141,7 +1142,6 @@ sub run_single_giza { if ($_HMM_ALIGN) { $GizaDefaultOptions{m3} = 0; $GizaDefaultOptions{m4} = 0; - $GizaDefaultOptions{hmmiterations} = 5; $GizaDefaultOptions{hmmdumpfrequency} = 5; $GizaDefaultOptions{nodumps} = 0; } From 030ea19e6c0e4f221982c7da05779c3949f5daa3 Mon Sep 17 00:00:00 2001 From: Phil Williams Date: Tue, 9 Dec 2014 15:47:55 +0000 Subject: [PATCH 04/14] Add Moses::Syntax::Manager class Sits between Moses::BaseManager and S2T::Manager, F2S::Manager, etc. --- moses/Syntax/KBestExtractor.h | 1 + moses/Syntax/Manager.cpp | 206 ++++++++++++++++++++++++++++++++ moses/Syntax/Manager.h | 58 +++++++++ moses/Syntax/S2T/Manager-inl.h | 212 ++------------------------------- moses/Syntax/S2T/Manager.h | 36 +----- moses/Syntax/S2T/OovHandler.h | 1 + 6 files changed, 280 insertions(+), 234 deletions(-) create mode 100644 moses/Syntax/Manager.cpp create mode 100644 moses/Syntax/Manager.h diff --git a/moses/Syntax/KBestExtractor.h b/moses/Syntax/KBestExtractor.h index 21fb6f7377..248d26c01a 100644 --- a/moses/Syntax/KBestExtractor.h +++ b/moses/Syntax/KBestExtractor.h @@ -5,6 +5,7 @@ #include #include +#include #include #include diff --git a/moses/Syntax/Manager.cpp b/moses/Syntax/Manager.cpp new file mode 100644 index 0000000000..e0b52f2f72 --- /dev/null +++ b/moses/Syntax/Manager.cpp @@ -0,0 +1,206 @@ +#include "Manager.h" + +#include + +#include "moses/OutputCollector.h" +#include "moses/StaticData.h" + +#include "PVertex.h" + +namespace Moses +{ +namespace Syntax +{ + +Manager::Manager(const InputType &source) + : Moses::BaseManager(source) +{ +} + +void Manager::OutputNBest(OutputCollector *collector) const +{ + if (collector) { + const StaticData &staticData = StaticData::Instance(); + long translationId = m_source.GetTranslationId(); + + KBestExtractor::KBestVec nBestList; + ExtractKBest(staticData.GetNBestSize(), nBestList, + staticData.GetDistinctNBest()); + OutputNBestList(collector, nBestList, translationId); + } +} + +void Manager::OutputUnknowns(OutputCollector *collector) const +{ + if (collector) { + long translationId = m_source.GetTranslationId(); + + std::ostringstream out; + for (std::set::const_iterator p = m_oovs.begin(); + p != m_oovs.end(); ++p) { + out << *p; + } + out << std::endl; + collector->Write(translationId, out.str()); + } +} + +void Manager::OutputNBestList(OutputCollector *collector, + const KBestExtractor::KBestVec &nBestList, + long translationId) const +{ + const StaticData &staticData = StaticData::Instance(); + + const std::vector &outputFactorOrder = + staticData.GetOutputFactorOrder(); + + std::ostringstream out; + + if (collector->OutputIsCout()) { + // Set precision only if we're writing the n-best list to cout. This is to + // preserve existing behaviour, but should probably be done either way. + FixPrecision(out); + } + + bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest(); + bool PrintNBestTrees = staticData.PrintNBestTrees(); + + for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin(); + p != nBestList.end(); ++p) { + const KBestExtractor::Derivation &derivation = **p; + + // get the derivation's target-side yield + Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation); + + // delete and + UTIL_THROW_IF2(outputPhrase.GetSize() < 2, + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + outputPhrase.RemoveWord(0); + outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); + + // print the translation ID, surface factors, and scores + out << translationId << " ||| "; + OutputSurface(out, outputPhrase, outputFactorOrder, false); + out << " ||| "; + OutputAllFeatureScores(derivation.scoreBreakdown, out); + out << " ||| " << derivation.score; + + // optionally, print word alignments + if (includeWordAlignment) { + out << " ||| "; + Alignments align; + OutputAlignmentNBest(align, derivation, 0); + for (Alignments::const_iterator q = align.begin(); q != align.end(); + ++q) { + out << q->first << "-" << q->second << " "; + } + } + + // optionally, print tree + if (PrintNBestTrees) { + TreePointer tree = KBestExtractor::GetOutputTree(derivation); + out << " ||| " << tree->GetString(); + } + + out << std::endl; + } + + assert(collector); + collector->Write(translationId, out.str()); +} + +std::size_t Manager::OutputAlignmentNBest( + Alignments &retAlign, + const KBestExtractor::Derivation &derivation, + std::size_t startTarget) const +{ + const SHyperedge ­peredge = derivation.edge->shyperedge; + + std::size_t totalTargetSize = 0; + std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos(); + + const TargetPhrase &tp = *(shyperedge.translation); + + std::size_t thisSourceSize = CalcSourceSize(derivation); + + // position of each terminal word in translation rule, irrespective of + // alignment if non-term, number is undefined + std::vector sourceOffsets(thisSourceSize, 0); + std::vector targetOffsets(tp.GetSize(), 0); + + const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm(); + std::vector sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap(); + const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = + aiNonTerm.GetNonTermIndexMap(); + + UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(), + "Error"); + + std::size_t targetInd = 0; + for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) { + if (tp.GetWord(targetPos).IsNonTerminal()) { + UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); + std::size_t sourceInd = targetPos2SourceInd[targetPos]; + std::size_t sourcePos = sourceInd2pos[sourceInd]; + + const KBestExtractor::Derivation &subderivation = + *derivation.subderivations[sourceInd]; + + // calc source size + std::size_t sourceSize = + subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered(); + sourceOffsets[sourcePos] = sourceSize; + + // calc target size. + // Recursively look thru child hypos + std::size_t currStartTarget = startTarget + totalTargetSize; + std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation, + currStartTarget); + targetOffsets[targetPos] = targetSize; + + totalTargetSize += targetSize; + ++targetInd; + } else { + ++totalTargetSize; + } + } + + // convert position within translation rule to absolute position within + // source sentence / output sentence + ShiftOffsets(sourceOffsets, startSource); + ShiftOffsets(targetOffsets, startTarget); + + // get alignments from this hypo + const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm(); + + // add to output arg, offsetting by source & target + AlignmentInfo::const_iterator iter; + for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) { + const std::pair &align = *iter; + std::size_t relSource = align.first; + std::size_t relTarget = align.second; + std::size_t absSource = sourceOffsets[relSource]; + std::size_t absTarget = targetOffsets[relTarget]; + + std::pair alignPoint(absSource, absTarget); + std::pair ret = retAlign.insert(alignPoint); + UTIL_THROW_IF2(!ret.second, "Error"); + } + + return totalTargetSize; +} + +std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const +{ + const SHyperedge ­peredge = d.edge->shyperedge; + std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered(); + for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) { + std::size_t childSize = + shyperedge.tail[i]->pvertex->span.GetNumWordsCovered(); + ret -= (childSize - 1); + } + return ret; +} + +} // Syntax +} // Moses diff --git a/moses/Syntax/Manager.h b/moses/Syntax/Manager.h new file mode 100644 index 0000000000..3216a9aa41 --- /dev/null +++ b/moses/Syntax/Manager.h @@ -0,0 +1,58 @@ +#pragma once + +#include "moses/InputType.h" +#include "moses/BaseManager.h" + +#include "KBestExtractor.h" + +namespace Moses +{ +namespace Syntax +{ + +// Common base class for Moses::Syntax managers. +class Manager : public BaseManager +{ + public: + Manager(const InputType &); + + // Virtual functions from Moses::BaseManager that are implemented the same + // way for all Syntax managers. + void OutputNBest(OutputCollector *collector) const; + void OutputUnknowns(OutputCollector *collector) const; + + // Virtual functions from Moses::BaseManager that are no-ops for all Syntax + // managers. + void OutputLatticeSamples(OutputCollector *collector) const {} + void OutputAlignment(OutputCollector *collector) const {} + void OutputDetailedTreeFragmentsTranslationReport( + OutputCollector *collector) const {} + void OutputWordGraph(OutputCollector *collector) const {} + void OutputSearchGraph(OutputCollector *collector) const {} + void OutputSearchGraphSLF() const {} + void OutputSearchGraphHypergraph() const {} + + // Syntax-specific virtual functions that derived classes must implement. + virtual void ExtractKBest( + std::size_t k, + std::vector > &kBestList, + bool onlyDistinct=false) const = 0; + + protected: + std::set m_oovs; + + private: + // Syntax-specific helper functions used to implement OutputNBest. + void OutputNBestList(OutputCollector *collector, + const KBestExtractor::KBestVec &nBestList, + long translationId) const; + + std::size_t OutputAlignmentNBest(Alignments &retAlign, + const KBestExtractor::Derivation &d, + std::size_t startTarget) const; + + std::size_t CalcSourceSize(const KBestExtractor::Derivation &d) const; +}; + +} // Syntax +} // Moses diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h index 3351d1a9fe..4d0136fe1e 100644 --- a/moses/Syntax/S2T/Manager-inl.h +++ b/moses/Syntax/S2T/Manager-inl.h @@ -2,6 +2,7 @@ #include #include + #include "moses/DecodeGraph.h" #include "moses/StaticData.h" #include "moses/Syntax/BoundedPriorityContainer.h" @@ -14,8 +15,8 @@ #include "moses/Syntax/SVertexRecombinationOrderer.h" #include "moses/Syntax/SymbolEqualityPred.h" #include "moses/Syntax/SymbolHasher.h" -#include "DerivationWriter.h" +#include "DerivationWriter.h" #include "OovHandler.h" #include "PChart.h" #include "RuleTrie.h" @@ -30,7 +31,7 @@ namespace S2T template Manager::Manager(const InputType &source) - : BaseManager(source) + : Syntax::Manager(source) , m_pchart(source.GetSize(), Parser::RequiresCompressedChart()) , m_schart(source.GetSize()) { @@ -44,7 +45,7 @@ void Manager::InitializeCharts() const Word &terminal = m_source.GetWord(i); // PVertex - PVertex tmp(WordsRange(i,i), m_source.GetWord(i)); + PVertex tmp(WordsRange(i,i), terminal); PVertex &pvertex = m_pchart.AddVertex(tmp); // SVertex @@ -262,6 +263,7 @@ const SHyperedge *Manager::GetBestSHyperedge() const } assert(stacks.Size() == 1); const std::vector > &stack = stacks.Begin()->second; + // TODO Throw exception if stack is empty? Or return 0? return stack[0]->best; } @@ -284,6 +286,7 @@ void Manager::ExtractKBest( } assert(stacks.Size() == 1); const std::vector > &stack = stacks.Begin()->second; + // TODO Throw exception if stack is empty? Or return 0? KBestExtractor extractor; @@ -386,212 +389,17 @@ void Manager::RecombineAndSort(const std::vector &buffer, } template -void Manager::OutputNBest(OutputCollector *collector) const -{ - if (collector) { - const StaticData &staticData = StaticData::Instance(); - long translationId = m_source.GetTranslationId(); - - Syntax::KBestExtractor::KBestVec nBestList; - ExtractKBest(staticData.GetNBestSize(), nBestList, - staticData.GetDistinctNBest()); - OutputNBestList(collector, nBestList, translationId); - } - -} - - -template -void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const +void Manager::OutputDetailedTranslationReport( + OutputCollector *collector) const { const SHyperedge *best = GetBestSHyperedge(); if (best == NULL || collector == NULL) { - return; + return; } - long translationId = m_source.GetTranslationId(); std::ostringstream out; - Syntax::S2T::DerivationWriter::Write(*best, translationId, out); + DerivationWriter::Write(*best, translationId, out); collector->Write(translationId, out.str()); - -} - -template -void Manager::OutputUnknowns(OutputCollector *collector) const -{ - if (collector) { - long translationId = m_source.GetTranslationId(); - - std::ostringstream out; - for (std::set::const_iterator p = m_oovs.begin(); - p != m_oovs.end(); ++p) { - out << *p; - } - out << std::endl; - collector->Write(translationId, out.str()); - } - -} - -template -void Manager::OutputNBestList(OutputCollector *collector, - const Syntax::KBestExtractor::KBestVec &nBestList, - long translationId) const -{ - const StaticData &staticData = StaticData::Instance(); - - const std::vector &outputFactorOrder = staticData.GetOutputFactorOrder(); - - std::ostringstream out; - - if (collector->OutputIsCout()) { - // Set precision only if we're writing the n-best list to cout. This is to - // preserve existing behaviour, but should probably be done either way. - FixPrecision(out); - } - - bool includeWordAlignment = - staticData.PrintAlignmentInfoInNbest(); - - bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees(); - - for (Syntax::KBestExtractor::KBestVec::const_iterator p = nBestList.begin(); - p != nBestList.end(); ++p) { - const Syntax::KBestExtractor::Derivation &derivation = **p; - - // get the derivation's target-side yield - Phrase outputPhrase = Syntax::KBestExtractor::GetOutputPhrase(derivation); - - // delete and - UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); - outputPhrase.RemoveWord(0); - outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); - - // print the translation ID, surface factors, and scores - out << translationId << " ||| "; - OutputSurface(out, outputPhrase, outputFactorOrder, false); - out << " ||| "; - OutputAllFeatureScores(derivation.scoreBreakdown, out); - out << " ||| " << derivation.score; - - // optionally, print word alignments - if (includeWordAlignment) { - out << " ||| "; - Alignments align; - OutputAlignmentNBest(align, derivation, 0); - for (Alignments::const_iterator q = align.begin(); q != align.end(); - ++q) { - out << q->first << "-" << q->second << " "; - } - } - - // optionally, print tree - if (PrintNBestTrees) { - TreePointer tree = Syntax::KBestExtractor::GetOutputTree(derivation); - out << " ||| " << tree->GetString(); - } - - out << std::endl; - } - - assert(collector); - collector->Write(translationId, out.str()); -} - -template -size_t Manager::OutputAlignmentNBest( - Alignments &retAlign, - const Syntax::KBestExtractor::Derivation &derivation, - size_t startTarget) const -{ - const Syntax::SHyperedge ­peredge = derivation.edge->shyperedge; - - size_t totalTargetSize = 0; - size_t startSource = shyperedge.head->pvertex->span.GetStartPos(); - - const TargetPhrase &tp = *(shyperedge.translation); - - size_t thisSourceSize = CalcSourceSize(derivation); - - // position of each terminal word in translation rule, irrespective of alignment - // if non-term, number is undefined - std::vector sourceOffsets(thisSourceSize, 0); - std::vector targetOffsets(tp.GetSize(), 0); - - const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm(); - std::vector sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap(); - const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap(); - - UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(), - "Error"); - - size_t targetInd = 0; - for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) { - if (tp.GetWord(targetPos).IsNonTerminal()) { - UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error"); - size_t sourceInd = targetPos2SourceInd[targetPos]; - size_t sourcePos = sourceInd2pos[sourceInd]; - - const Moses::Syntax::KBestExtractor::Derivation &subderivation = - *derivation.subderivations[sourceInd]; - - // calc source size - size_t sourceSize = - subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered(); - sourceOffsets[sourcePos] = sourceSize; - - // calc target size. - // Recursively look thru child hypos - size_t currStartTarget = startTarget + totalTargetSize; - size_t targetSize = OutputAlignmentNBest(retAlign, subderivation, - currStartTarget); - targetOffsets[targetPos] = targetSize; - - totalTargetSize += targetSize; - ++targetInd; - } else { - ++totalTargetSize; - } - } - - // convert position within translation rule to absolute position within - // source sentence / output sentence - ShiftOffsets(sourceOffsets, startSource); - ShiftOffsets(targetOffsets, startTarget); - - // get alignments from this hypo - const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm(); - - // add to output arg, offsetting by source & target - AlignmentInfo::const_iterator iter; - for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) { - const std::pair &align = *iter; - size_t relSource = align.first; - size_t relTarget = align.second; - size_t absSource = sourceOffsets[relSource]; - size_t absTarget = targetOffsets[relTarget]; - - std::pair alignPoint(absSource, absTarget); - std::pair ret = retAlign.insert(alignPoint); - UTIL_THROW_IF2(!ret.second, "Error"); - } - - return totalTargetSize; -} - -template -size_t Manager::CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const -{ - using namespace Moses::Syntax; - - const Syntax::SHyperedge ­peredge = d.edge->shyperedge; - size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered(); - for (size_t i = 0; i < shyperedge.tail.size(); ++i) { - size_t childSize = shyperedge.tail[i]->pvertex->span.GetNumWordsCovered(); - ret -= (childSize - 1); - } - return ret; } } // S2T diff --git a/moses/Syntax/S2T/Manager.h b/moses/Syntax/S2T/Manager.h index 096e3c142a..47cca43d6e 100644 --- a/moses/Syntax/S2T/Manager.h +++ b/moses/Syntax/S2T/Manager.h @@ -1,13 +1,15 @@ #pragma once +#include #include #include #include "moses/InputType.h" -#include "moses/BaseManager.h" #include "moses/Syntax/KBestExtractor.h" +#include "moses/Syntax/Manager.h" #include "moses/Syntax/SVertexStack.h" +#include "moses/Word.h" #include "OovHandler.h" #include "ParserCallback.h" @@ -19,14 +21,13 @@ namespace Moses namespace Syntax { -class SDerivation; struct SHyperedge; namespace S2T { template -class Manager : public BaseManager +class Manager : public Syntax::Manager { public: Manager(const InputType &); @@ -41,25 +42,7 @@ class Manager : public BaseManager std::vector > &kBestList, bool onlyDistinct=false) const; - const std::set &GetUnknownWords() const { return m_oovs; } - - void OutputNBest(OutputCollector *collector) const; - void OutputLatticeSamples(OutputCollector *collector) const - {} - void OutputAlignment(OutputCollector *collector) const - {} void OutputDetailedTranslationReport(OutputCollector *collector) const; - void OutputUnknowns(OutputCollector *collector) const; - void OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const - {} - void OutputWordGraph(OutputCollector *collector) const - {} - void OutputSearchGraph(OutputCollector *collector) const - {} - void OutputSearchGraphSLF() const - {} - void OutputSearchGraphHypergraph() const - {} private: void FindOovs(const PChart &, std::set &, std::size_t); @@ -74,19 +57,8 @@ class Manager : public BaseManager PChart m_pchart; SChart m_schart; - std::set m_oovs; boost::shared_ptr m_oovRuleTrie; std::vector > m_parsers; - - // output - void OutputNBestList(OutputCollector *collector, - const Moses::Syntax::KBestExtractor::KBestVec &nBestList, - long translationId) const; - std::size_t OutputAlignmentNBest(Alignments &retAlign, - const Moses::Syntax::KBestExtractor::Derivation &derivation, - std::size_t startTarget) const; - size_t CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const; - }; } // S2T diff --git a/moses/Syntax/S2T/OovHandler.h b/moses/Syntax/S2T/OovHandler.h index b74e697c54..4b01334139 100644 --- a/moses/Syntax/S2T/OovHandler.h +++ b/moses/Syntax/S2T/OovHandler.h @@ -4,6 +4,7 @@ #include +#include "moses/Phrase.h" #include "moses/Syntax/RuleTableFF.h" #include "moses/TargetPhrase.h" #include "moses/Word.h" From a4c9e5f9200be7f515441b9356e0e1b03ae2b596 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 9 Dec 2014 17:58:23 +0000 Subject: [PATCH 05/14] eclipse --- contrib/other-builds/moses/.project | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project index d26a65d862..d71f702038 100644 --- a/contrib/other-builds/moses/.project +++ b/contrib/other-builds/moses/.project @@ -1855,6 +1855,16 @@ 1 PARENT-3-PROJECT_LOC/moses/Syntax/KBestExtractor.h + + Syntax/Manager.cpp + 1 + PARENT-3-PROJECT_LOC/moses/Syntax/Manager.cpp + + + Syntax/Manager.h + 1 + PARENT-3-PROJECT_LOC/moses/Syntax/Manager.h + Syntax/NonTerminalMap.h 1 From fad3ef687a2ed3c40c6da5e9909a9576cb5cb3a3 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 10 Dec 2014 11:01:56 +0000 Subject: [PATCH 06/14] move OutputBest() to Syntax::Manager --- contrib/other-builds/consolidate/.cproject | 6 ++-- moses/IOWrapper.cpp | 33 --------------------- moses/IOWrapper.h | 1 - moses/Syntax/S2T/Manager-inl.h | 34 ++++++++++++++++++++++ moses/Syntax/S2T/Manager.h | 1 + moses/TranslationTask.h | 4 +-- 6 files changed, 41 insertions(+), 38 deletions(-) diff --git a/contrib/other-builds/consolidate/.cproject b/contrib/other-builds/consolidate/.cproject index 4593957dca..c40df5b28f 100644 --- a/contrib/other-builds/consolidate/.cproject +++ b/contrib/other-builds/consolidate/.cproject @@ -5,12 +5,12 @@ - + @@ -48,6 +48,7 @@ + @@ -86,12 +87,12 @@ - + @@ -156,4 +157,5 @@ + diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp index ee7b12db5e..ee736585c0 100644 --- a/moses/IOWrapper.cpp +++ b/moses/IOWrapper.cpp @@ -872,38 +872,5 @@ void IOWrapper::OutputLatticeMBRNBestList(const vector& solu OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId); } -//////////////////////////// -#include "moses/Syntax/PVertex.h" -#include "moses/Syntax/S2T/DerivationWriter.h" - -void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best, - long translationId) -{ - if (!m_singleBestOutputCollector) { - return; - } - std::ostringstream out; - FixPrecision(out); - if (best == NULL) { - VERBOSE(1, "NO BEST TRANSLATION" << std::endl); - if (StaticData::Instance().GetOutputHypoScore()) { - out << "0 "; - } - } else { - if (StaticData::Instance().GetOutputHypoScore()) { - out << best->score << " "; - } - Phrase yield = Syntax::GetOneBestTargetYield(*best); - // delete 1st & last - UTIL_THROW_IF2(yield.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); - yield.RemoveWord(0); - yield.RemoveWord(yield.GetSize()-1); - out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); - out << '\n'; - } - m_singleBestOutputCollector->Write(translationId, out.str()); -} - } // namespace diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h index 9bcd387d8f..e0848f4331 100644 --- a/moses/IOWrapper.h +++ b/moses/IOWrapper.h @@ -173,7 +173,6 @@ class IOWrapper // CHART void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId); void OutputBestHypo(search::Applied applied, long translationId); - void OutputBestHypo(const Moses::Syntax::SHyperedge *, long translationId); void OutputBestNone(long translationId); diff --git a/moses/Syntax/S2T/Manager-inl.h b/moses/Syntax/S2T/Manager-inl.h index 4d0136fe1e..46b2ab91c4 100644 --- a/moses/Syntax/S2T/Manager-inl.h +++ b/moses/Syntax/S2T/Manager-inl.h @@ -388,6 +388,40 @@ void Manager::RecombineAndSort(const std::vector &buffer, std::sort(stack.begin(), stack.end(), SVertexStackContentOrderer()); } +template +void Manager::OutputBest(OutputCollector *collector) const +{ + if (!collector) { + return; + } + + const Syntax::SHyperedge *best = GetBestSHyperedge(); + const long translationId = m_source.GetTranslationId(); + + std::ostringstream out; + FixPrecision(out); + if (best == NULL) { + VERBOSE(1, "NO BEST TRANSLATION" << std::endl); + if (StaticData::Instance().GetOutputHypoScore()) { + out << "0 "; + } + } else { + if (StaticData::Instance().GetOutputHypoScore()) { + out << best->score << " "; + } + Phrase yield = Syntax::GetOneBestTargetYield(*best); + // delete 1st & last + UTIL_THROW_IF2(yield.GetSize() < 2, + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + yield.RemoveWord(0); + yield.RemoveWord(yield.GetSize()-1); + out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); + out << '\n'; + } + collector->Write(translationId, out.str()); + +} + template void Manager::OutputDetailedTranslationReport( OutputCollector *collector) const diff --git a/moses/Syntax/S2T/Manager.h b/moses/Syntax/S2T/Manager.h index 47cca43d6e..82eaf90e7c 100644 --- a/moses/Syntax/S2T/Manager.h +++ b/moses/Syntax/S2T/Manager.h @@ -42,6 +42,7 @@ class Manager : public Syntax::Manager std::vector > &kBestList, bool onlyDistinct=false) const; + void OutputBest(OutputCollector *collector) const; void OutputDetailedTranslationReport(OutputCollector *collector) const; private: diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h index 217ffce009..0225f75b08 100644 --- a/moses/TranslationTask.h +++ b/moses/TranslationTask.h @@ -51,8 +51,8 @@ class TranslationTask : public Moses::Task Syntax::S2T::Manager manager(*m_source); manager.Decode(); // 1-best - const Syntax::SHyperedge *best = manager.GetBestSHyperedge(); - m_ioWrapper.OutputBestHypo(best, translationId); + manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector()); + // n-best manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector()); From 99cfba8769cc84416c953693f7ccdf98c688423e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 10 Dec 2014 11:28:47 +0000 Subject: [PATCH 07/14] move OutputBest() to Incremental::Manager --- moses/Incremental.cpp | 46 +++++++++++++++++++++++++++++++++++++++ moses/Incremental.h | 3 +++ moses/TranslationTask.cpp | 14 +++--------- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index 51806656ca..e54591fc69 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -283,6 +283,20 @@ const std::vector &Manager::GetNBest() const return *completed_nbest_; } +void Manager::OutputBest(OutputCollector *collector) const +{ + const long translationId = m_source.GetTranslationId(); + const std::vector &nbest = GetNBest(); + if (!nbest.empty()) { + OutputBestHypo(collector, nbest[0], translationId); + } + else { + OutputBestNone(collector, translationId); + } + +} + + void Manager::OutputNBest(OutputCollector *collector) const { if (collector == NULL) { @@ -465,6 +479,38 @@ void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out, } } +void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const +{ + if (collector == NULL) return; + std::ostringstream out; + FixPrecision(out); + if (StaticData::Instance().GetOutputHypoScore()) { + out << applied.GetScore() << ' '; + } + Phrase outPhrase; + Incremental::ToPhrase(applied, outPhrase); + // delete 1st & last + UTIL_THROW_IF2(outPhrase.GetSize() < 2, + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + outPhrase.RemoveWord(0); + outPhrase.RemoveWord(outPhrase.GetSize() - 1); + out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); + out << '\n'; + collector->Write(translationId, out.str()); + + VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl); +} + +void Manager::OutputBestNone(OutputCollector *collector, long translationId) const +{ + if (collector == NULL) return; + if (StaticData::Instance().GetOutputHypoScore()) { + collector->Write(translationId, "0 \n"); + } else { + collector->Write(translationId, "\n"); + } +} + namespace { diff --git a/moses/Incremental.h b/moses/Incremental.h index 1115884eea..1f5eba86a9 100644 --- a/moses/Incremental.h +++ b/moses/Incremental.h @@ -40,6 +40,7 @@ class Manager : public BaseManager } // output + void OutputBest(OutputCollector *collector) const; void OutputNBest(OutputCollector *collector) const; void OutputDetailedTranslationReport(OutputCollector *collector) const; void OutputNBestList(OutputCollector *collector, const std::vector &nbest, long translationId) const; @@ -98,6 +99,8 @@ class Manager : public BaseManager const search::Applied *applied, const Sentence &sentence, long translationId) const; + void OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const; + void OutputBestNone(OutputCollector *collector, long translationId) const; }; diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index d8b448d447..2a4c9fd486 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -268,17 +268,9 @@ void TranslationTask::RunChart() if (staticData.GetSearchAlgorithm() == ChartIncremental) { Incremental::Manager manager(*m_source); manager.Decode(); - const std::vector &nbest = manager.GetNBest(); - if (!nbest.empty()) { - m_ioWrapper.OutputBestHypo(nbest[0], translationId); - - manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector()); - manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector()); - - } else { - m_ioWrapper.OutputBestNone(translationId); - } - + manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector()); + manager.OutputDetailedTranslationReport(m_ioWrapper.GetDetailedTranslationCollector()); + manager.OutputDetailedTreeFragmentsTranslationReport(m_ioWrapper.GetDetailTreeFragmentsOutputCollector()); manager.OutputNBest(m_ioWrapper.GetNBestOutputCollector()); return; From 81640945d09b22ff297673c9c685cab9f344b266 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 10 Dec 2014 12:47:23 +0000 Subject: [PATCH 08/14] move OutputBest() to ChartManager --- moses/ChartManager.cpp | 67 ++++++++++++++++++++++++++++ moses/ChartManager.h | 3 ++ moses/IOWrapper.cpp | 91 --------------------------------------- moses/IOWrapper.h | 5 --- moses/Manager.cpp | 5 +++ moses/Manager.h | 1 + moses/TranslationTask.cpp | 6 ++- 7 files changed, 80 insertions(+), 98 deletions(-) diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp index 568920b27c..55707b5306 100644 --- a/moses/ChartManager.cpp +++ b/moses/ChartManager.cpp @@ -300,6 +300,16 @@ void ChartManager::OutputSearchGraphMoses(std::ostream &outputSearchGraphStream) WriteSearchGraph(writer); } +void ChartManager::OutputBest(OutputCollector *collector) const +{ + const ChartHypothesis *bestHypo = GetBestHypothesis(); + if (collector && bestHypo) { + const size_t translationId = m_source.GetTranslationId(); + const ChartHypothesis *bestHypo = GetBestHypothesis(); + OutputBestHypo(collector, bestHypo, translationId); + } +} + void ChartManager::OutputNBest(OutputCollector *collector) const { const StaticData &staticData = StaticData::Instance(); @@ -807,4 +817,61 @@ void ChartManager::OutputSearchGraphHypergraph() const } } +void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const +{ + if (!collector) + return; + std::ostringstream out; + FixPrecision(out); + if (hypo != NULL) { + VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl); + VERBOSE(3,"Best path: "); + Backtrack(hypo); + VERBOSE(3,"0" << std::endl); + + if (StaticData::Instance().GetOutputHypoScore()) { + out << hypo->GetTotalScore() << " "; + } + + if (StaticData::Instance().IsPathRecoveryEnabled()) { + out << "||| "; + } + Phrase outPhrase(ARRAY_SIZE_INCR); + hypo->GetOutputPhrase(outPhrase); + + // delete 1st & last + UTIL_THROW_IF2(outPhrase.GetSize() < 2, + "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + + outPhrase.RemoveWord(0); + outPhrase.RemoveWord(outPhrase.GetSize() - 1); + + const std::vector outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); + string output = outPhrase.GetStringRep(outputFactorOrder); + out << output << endl; + } else { + VERBOSE(1, "NO BEST TRANSLATION" << endl); + + if (StaticData::Instance().GetOutputHypoScore()) { + out << "0 "; + } + + out << endl; + } + collector->Write(translationId, out.str()); +} + +void ChartManager::Backtrack(const ChartHypothesis *hypo) const +{ + const vector &prevHypos = hypo->GetPrevHypos(); + + vector::const_iterator iter; + for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) { + const ChartHypothesis *prevHypo = *iter; + + VERBOSE(3,prevHypo->GetId() << " <= "); + Backtrack(prevHypo); + } +} + } // namespace Moses diff --git a/moses/ChartManager.h b/moses/ChartManager.h index a4f27750e9..8f32a36dd8 100644 --- a/moses/ChartManager.h +++ b/moses/ChartManager.h @@ -99,6 +99,8 @@ class ChartManager : public BaseManager const std::vector > &nBestList, const Sentence &sentence, long translationId) const; + void OutputBestHypo(OutputCollector *collector, const ChartHypothesis *hypo, long translationId) const; + void Backtrack(const ChartHypothesis *hypo) const; public: ChartManager(InputType const& source); @@ -143,6 +145,7 @@ class ChartManager : public BaseManager const ChartParser &GetParser() const { return m_parser; } // outputs + void OutputBest(OutputCollector *collector) const; void OutputNBest(OutputCollector *collector) const; void OutputLatticeSamples(OutputCollector *collector) const {} diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp index ee736585c0..a4abbe1ef6 100644 --- a/moses/IOWrapper.cpp +++ b/moses/IOWrapper.cpp @@ -278,97 +278,6 @@ std::map IOWrapper::GetPlaceholders(const Hypothesis &hyp return ret; } - -void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId) -{ - if (!m_singleBestOutputCollector) - return; - std::ostringstream out; - FixPrecision(out); - if (hypo != NULL) { - VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl); - VERBOSE(3,"Best path: "); - Backtrack(hypo); - VERBOSE(3,"0" << std::endl); - - if (StaticData::Instance().GetOutputHypoScore()) { - out << hypo->GetTotalScore() << " "; - } - - if (StaticData::Instance().IsPathRecoveryEnabled()) { - out << "||| "; - } - Phrase outPhrase(ARRAY_SIZE_INCR); - hypo->GetOutputPhrase(outPhrase); - - // delete 1st & last - UTIL_THROW_IF2(outPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); - - outPhrase.RemoveWord(0); - outPhrase.RemoveWord(outPhrase.GetSize() - 1); - - const std::vector outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - string output = outPhrase.GetStringRep(outputFactorOrder); - out << output << endl; - } else { - VERBOSE(1, "NO BEST TRANSLATION" << endl); - - if (StaticData::Instance().GetOutputHypoScore()) { - out << "0 "; - } - - out << endl; - } - m_singleBestOutputCollector->Write(translationId, out.str()); -} - -void IOWrapper::OutputBestHypo(search::Applied applied, long translationId) -{ - if (!m_singleBestOutputCollector) return; - std::ostringstream out; - FixPrecision(out); - if (StaticData::Instance().GetOutputHypoScore()) { - out << applied.GetScore() << ' '; - } - Phrase outPhrase; - Incremental::ToPhrase(applied, outPhrase); - // delete 1st & last - UTIL_THROW_IF2(outPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); - outPhrase.RemoveWord(0); - outPhrase.RemoveWord(outPhrase.GetSize() - 1); - out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder()); - out << '\n'; - m_singleBestOutputCollector->Write(translationId, out.str()); - - VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << endl); -} - -void IOWrapper::OutputBestNone(long translationId) -{ - if (!m_singleBestOutputCollector) return; - if (StaticData::Instance().GetOutputHypoScore()) { - m_singleBestOutputCollector->Write(translationId, "0 \n"); - } else { - m_singleBestOutputCollector->Write(translationId, "\n"); - } -} - -void IOWrapper::Backtrack(const ChartHypothesis *hypo) -{ - const vector &prevHypos = hypo->GetPrevHypos(); - - vector::const_iterator iter; - for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) { - const ChartHypothesis *prevHypo = *iter; - - VERBOSE(3,prevHypo->GetId() << " <= "); - Backtrack(prevHypo); - } -} - - void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId) { if (hypo != NULL) { diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h index e0848f4331..a422c9b6d6 100644 --- a/moses/IOWrapper.h +++ b/moses/IOWrapper.h @@ -103,7 +103,6 @@ class IOWrapper // CHART typedef std::vector > ApplicationContext; - void Backtrack(const ChartHypothesis *hypo); void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId); void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId); void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId); @@ -171,10 +170,6 @@ class IOWrapper // CHART - void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId); - void OutputBestHypo(search::Applied applied, long translationId); - - void OutputBestNone(long translationId); // phrase-based void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector &outputFactorOrder, char reportSegmentation, bool reportAllFactors); diff --git a/moses/Manager.cpp b/moses/Manager.cpp index f061ea640b..6fba8a1039 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -1450,6 +1450,11 @@ SentenceStats& Manager::GetSentenceStats() const } +void Manager::OutputBest(OutputCollector *collector) const +{ + +} + void Manager::OutputNBest(OutputCollector *collector) const { const StaticData &staticData = StaticData::Instance(); diff --git a/moses/Manager.h b/moses/Manager.h index 8e948c9c20..14dff1f047 100644 --- a/moses/Manager.h +++ b/moses/Manager.h @@ -187,6 +187,7 @@ class Manager : public BaseManager std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, std::set < const Hypothesis* > >* pOutgoingHyps, std::vector< float>* pFwdBwdScores) const; // outputs + void OutputBest(OutputCollector *collector) const; void OutputNBest(OutputCollector *collector) const; void OutputAlignment(OutputCollector *collector) const; void OutputLatticeSamples(OutputCollector *collector) const; diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 2a4c9fd486..ce4cb93280 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -90,6 +90,8 @@ void TranslationTask::RunPb() additionalReportingTime.stop(); + manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector()); + // apply decision rule and output best translation(s) if (m_ioWrapper.GetSingleBestOutputCollector()) { ostringstream out; @@ -285,8 +287,8 @@ void TranslationTask::RunChart() manager.OutputSearchGraphHypergraph(); // 1-best - const ChartHypothesis *bestHypo = manager.GetBestHypothesis(); - m_ioWrapper.OutputBestHypo(bestHypo, translationId); + manager.OutputBest(m_ioWrapper.GetSingleBestOutputCollector()); + IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); } From 1a0e329791bf8f9d8815a1353027be16fb2b15c3 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 10 Dec 2014 20:04:16 +0000 Subject: [PATCH 09/14] const some args. Ready to internalize n-best creation into Managers --- moses/LatticeMBR.cpp | 6 +++--- moses/LatticeMBR.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/moses/LatticeMBR.cpp b/moses/LatticeMBR.cpp index 9ea21d5dbc..b6fa147578 100644 --- a/moses/LatticeMBR.cpp +++ b/moses/LatticeMBR.cpp @@ -499,7 +499,7 @@ bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b) return a->GetWordsBitmap().GetNumWordsCovered() < b->GetWordsBitmap().GetNumWordsCovered(); } -void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList, +void getLatticeMBRNBest(const Manager& manager, const TrellisPathList& nBestList, vector& solutions, size_t n) { const StaticData& staticData = StaticData::Instance(); @@ -546,7 +546,7 @@ void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList, VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl); } -vector doLatticeMBR(Manager& manager, TrellisPathList& nBestList) +vector doLatticeMBR(const Manager& manager, const TrellisPathList& nBestList) { vector solutions; @@ -554,7 +554,7 @@ vector doLatticeMBR(Manager& manager, TrellisPathList& nBestList) return solutions.at(0).GetWords(); } -const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList) +const TrellisPath doConsensusDecoding(const Manager& manager, const TrellisPathList& nBestList) { static const int BLEU_ORDER = 4; static const float SMOOTH = 1; diff --git a/moses/LatticeMBR.h b/moses/LatticeMBR.h index 47d6da3c48..5fa47949d4 100644 --- a/moses/LatticeMBR.h +++ b/moses/LatticeMBR.h @@ -137,15 +137,15 @@ void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*, const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale); //Use the ngram scores to rerank the nbest list, return at most n solutions -void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector& solutions, size_t n); +void getLatticeMBRNBest(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList, std::vector& solutions, size_t n); //calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true. void calcNgramExpectations(Lattice & connectedHyp, std::map >& incomingEdges, std::map& finalNgramScores, bool posteriors); void GetOutputFactors(const Moses::TrellisPath &path, std::vector &translation); void extract_ngrams(const std::vector& sentence, std::map < Moses::Phrase, int > & allngrams); bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b); -std::vector doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList); -const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList); +std::vector doLatticeMBR(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList); +const Moses::TrellisPath doConsensusDecoding(const Moses::Manager& manager, const Moses::TrellisPathList& nBestList); //std::vector doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList); } From c48a3aadc11fd365bb74052401c93ddb8ae09177 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Thu, 11 Dec 2014 16:54:19 +0000 Subject: [PATCH 10/14] chmod --- scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pl | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pl diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pl b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pl old mode 100755 new mode 100644 From 8bbccd441a422a80d09eb5d916ee69d5d76b90bf Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 11 Dec 2014 23:51:30 -0500 Subject: [PATCH 11/14] Fix #85 by changing the default LM. Hieu said it's ok in the issue. --- scripts/training/train-model.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index 7fe524165d..788e207232 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -2115,7 +2115,7 @@ sub create_ini { my $path = `pwd`; chop($path); $fn = $path."/".$fn; } - $type = "SRILM" unless defined $type; # default to SRILM if no type given + $type = "KENLM" unless defined $type; # default to KENLM if no type given if ($type =~ /^\d+$/) { # backwards compatibility if the type is given not as string but as a number From 10d41d025e257546fde4fa7ebee3128c30bc769b Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sat, 13 Dec 2014 23:19:09 +0000 Subject: [PATCH 12/14] move collection of DistortionScoreProducer to it own class --- moses/BitmapContainer.cpp | 2 +- moses/FF/DistortionScoreProducer.cpp | 3 +++ moses/FF/DistortionScoreProducer.h | 7 +++++++ moses/FF/FeatureFunction.cpp | 5 ----- moses/FF/FeatureFunction.h | 5 ----- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/moses/BitmapContainer.cpp b/moses/BitmapContainer.cpp index 2585c2eee3..d7e27c2981 100644 --- a/moses/BitmapContainer.cpp +++ b/moses/BitmapContainer.cpp @@ -57,7 +57,7 @@ class HypothesisScoreOrdererWithDistortion m_totalWeightDistortion = 0; const StaticData &staticData = StaticData::Instance(); - const std::vector &ffs = FeatureFunction::GetDistortionFeatureFunctions(); + const std::vector &ffs = DistortionScoreProducer::GetDistortionFeatureFunctions(); std::vector::const_iterator iter; for (iter = ffs.begin(); iter != ffs.end(); ++iter) { const DistortionScoreProducer *ff = *iter; diff --git a/moses/FF/DistortionScoreProducer.cpp b/moses/FF/DistortionScoreProducer.cpp index 5995fe2130..e1571d2a93 100644 --- a/moses/FF/DistortionScoreProducer.cpp +++ b/moses/FF/DistortionScoreProducer.cpp @@ -22,9 +22,12 @@ struct DistortionState_traditional : public FFState { } }; +std::vector DistortionScoreProducer::s_staticColl; + DistortionScoreProducer::DistortionScoreProducer(const std::string &line) : StatefulFeatureFunction(1, line) { + s_staticColl.push_back(this); ReadParameters(); } diff --git a/moses/FF/DistortionScoreProducer.h b/moses/FF/DistortionScoreProducer.h index aa2c18b95d..2dc1338c60 100644 --- a/moses/FF/DistortionScoreProducer.h +++ b/moses/FF/DistortionScoreProducer.h @@ -16,7 +16,14 @@ class WordsRange; */ class DistortionScoreProducer : public StatefulFeatureFunction { +protected: + static std::vector s_staticColl; + public: + static const std::vector& GetDistortionFeatureFunctions() { + return s_staticColl; + } + DistortionScoreProducer(const std::string &line); bool IsUseable(const FactorMask &mask) const { diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp index 8fba98315b..bd143d8095 100644 --- a/moses/FF/FeatureFunction.cpp +++ b/moses/FF/FeatureFunction.cpp @@ -17,7 +17,6 @@ namespace Moses multiset FeatureFunction::description_counts; std::vector FeatureFunction::s_staticColl; -std::vector FeatureFunction::s_staticCollDistortion; FeatureFunction &FeatureFunction::FindFeatureFunction(const std::string& name) { @@ -71,10 +70,6 @@ Initialize(const std::string &line) ScoreComponentCollection::RegisterScoreProducer(this); s_staticColl.push_back(this); - - const DistortionScoreProducer *distortion = dynamic_cast(this); - if(distortion) - s_staticCollDistortion.push_back (distortion); } FeatureFunction::~FeatureFunction() {} diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h index 935a4140a1..9d02c261ee 100644 --- a/moses/FF/FeatureFunction.h +++ b/moses/FF/FeatureFunction.h @@ -31,7 +31,6 @@ class FeatureFunction protected: /**< all the score producers in this run */ static std::vector s_staticColl; - static std::vector s_staticCollDistortion; std::string m_description, m_argLine; std::vector > m_args; @@ -49,10 +48,6 @@ class FeatureFunction return s_staticColl; } - static const std::vector& GetDistortionFeatureFunctions() { - return s_staticCollDistortion; - } - static FeatureFunction &FindFeatureFunction(const std::string& name); static void Destroy(); From 7c159b3d5881ec18bc87a99ddc87a10bebf486c7 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Sun, 14 Dec 2014 13:19:38 +0000 Subject: [PATCH 13/14] rename ChartDecoding -> CYKPlus. Comment out CubeGrowing, not implemented --- moses/FF/BleuScoreFeature.cpp | 2 +- moses/FF/ConstrainedDecoding.cpp | 2 +- moses/Search.cpp | 2 -- moses/StaticData.h | 2 +- moses/TypeDef.h | 4 ++-- 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp index 0d0a20797a..ac1dacced9 100644 --- a/moses/FF/BleuScoreFeature.cpp +++ b/moses/FF/BleuScoreFeature.cpp @@ -118,7 +118,7 @@ void BleuScoreFeature::SetParameter(const std::string& key, const std::string& v } string line; while (getline(in,line)) { - /* if (GetSearchAlgorithm() == ChartDecoding) { + /* if (GetSearchAlgorithm() == CYKPlus) { stringstream tmp; tmp << " " << line << " "; line = tmp.str(); diff --git a/moses/FF/ConstrainedDecoding.cpp b/moses/FF/ConstrainedDecoding.cpp index bfe4129135..f5514b655c 100644 --- a/moses/FF/ConstrainedDecoding.cpp +++ b/moses/FF/ConstrainedDecoding.cpp @@ -43,7 +43,7 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line) void ConstrainedDecoding::Load() { const StaticData &staticData = StaticData::Instance(); - bool addBeginEndWord = (staticData.GetSearchAlgorithm() == ChartDecoding) || (staticData.GetSearchAlgorithm() == ChartIncremental); + bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental); for(size_t i = 0; i < m_paths.size(); ++i) { InputFileStream constraintFile(m_paths[i]); diff --git a/moses/Search.cpp b/moses/Search.cpp index 0300710212..f6b8dc53a4 100644 --- a/moses/Search.cpp +++ b/moses/Search.cpp @@ -25,8 +25,6 @@ Search *Search::CreateSearch(Manager& manager, const InputType &source, return new SearchNormal(manager,source, transOptColl); case CubePruning: return new SearchCubePruning(manager, source, transOptColl); - case CubeGrowing: - return NULL; case NormalBatch: return new SearchNormalBatch(manager, source, transOptColl); default: diff --git a/moses/StaticData.h b/moses/StaticData.h index 0473146079..7bafdb73a0 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -419,7 +419,7 @@ class StaticData return m_searchAlgorithm; } bool IsChart() const { - return m_searchAlgorithm == ChartDecoding || m_searchAlgorithm == ChartIncremental; + return m_searchAlgorithm == CYKPlus || m_searchAlgorithm == ChartIncremental; } const ScoreComponentCollection& GetAllWeights() const { diff --git a/moses/TypeDef.h b/moses/TypeDef.h index d7cf3b3676..d794b3b438 100644 --- a/moses/TypeDef.h +++ b/moses/TypeDef.h @@ -141,8 +141,8 @@ enum DictionaryFind { enum SearchAlgorithm { Normal = 0 ,CubePruning = 1 - ,CubeGrowing = 2 - ,ChartDecoding= 3 + //,CubeGrowing = 2 + ,CYKPlus = 3 ,NormalBatch = 4 ,ChartIncremental = 5 }; From dfd6cd2dd7fb2451b899c0ee84172d2e4e291f77 Mon Sep 17 00:00:00 2001 From: Nicola Bertoldi Date: Mon, 15 Dec 2014 12:25:10 +0100 Subject: [PATCH 14/14] changes to make Moses compliant with the most recent version (5.80.06) of IRSTLM --- moses/LM/IRST.cpp | 3 +++ moses/LM/IRST.h | 9 +++++++++ moses/LM/Jamfile | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/moses/LM/IRST.cpp b/moses/LM/IRST.cpp index 88a38d283c..19a5f2c823 100644 --- a/moses/LM/IRST.cpp +++ b/moses/LM/IRST.cpp @@ -26,6 +26,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "n_gram.h" #include "lmContainer.h" +using namespace irstlm; + #include "IRST.h" #include "moses/TypeDef.h" #include "moses/Util.h" @@ -39,6 +41,7 @@ using namespace std; namespace Moses { + LanguageModelIRST::LanguageModelIRST(const std::string &line) :LanguageModelSingleFactor(line) ,m_lmtb_dub(0) diff --git a/moses/LM/IRST.h b/moses/LM/IRST.h index 9b895073ba..9d46fc759e 100644 --- a/moses/LM/IRST.h +++ b/moses/LM/IRST.h @@ -29,9 +29,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/Util.h" #include "SingleFactor.h" +//this is required because: +//- IRSTLM package uses the namespace irstlm +//- the compilation of "IRST.cpp" requires "using namespace irstlm", which is defined in any file of the IRSTLM package +// but conflicts with these foward declaration of class lmContainer +//- for files in moses/LM the IRSTLM include directory is set +// but not for the rest of files +#ifdef LM_IRST class lmContainer; // irst lm container for any lm type class ngram; class dictionary; +#endif + namespace Moses { diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile index 3d68d161bc..bda1d26815 100644 --- a/moses/LM/Jamfile +++ b/moses/LM/Jamfile @@ -19,7 +19,7 @@ if $(with-irstlm) { dependencies += irst ; lmmacros += LM_IRST ; echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; - echo "!!! You are linking with the IRSTLM library; be sure the release is >= 5.70.02 !!!" ; + echo "!!! You are linking with the IRSTLM library; be sure the release is >= 5.80.06 !!!" ; echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; }