From 0ee4feecb5d4deae35ac217d82bd3c57722619e4 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 27 Apr 2015 22:41:47 +0400 Subject: [PATCH 1/5] add extra parser argument to ChartCellCollection to support FF::EvaluateGivenAllOtherTransOpts() --- contrib/other-builds/moses/moses.project | 5 +++-- moses/ChartCellCollection.cpp | 3 ++- moses/ChartCellCollection.h | 7 +++++-- moses/Incremental.cpp | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/contrib/other-builds/moses/moses.project b/contrib/other-builds/moses/moses.project index 55bf4e8f16..adebcdfb4c 100644 --- a/contrib/other-builds/moses/moses.project +++ b/contrib/other-builds/moses/moses.project @@ -778,6 +778,8 @@ + + @@ -796,6 +798,7 @@ + @@ -870,6 +873,4 @@ - - diff --git a/moses/ChartCellCollection.cpp b/moses/ChartCellCollection.cpp index 46392261d4..55c50a4498 100644 --- a/moses/ChartCellCollection.cpp +++ b/moses/ChartCellCollection.cpp @@ -22,6 +22,7 @@ #include "ChartCellCollection.h" #include "InputType.h" #include "WordsRange.h" +#include "ChartManager.h" namespace Moses { @@ -51,7 +52,7 @@ class CubeCellFactory \param manager reference back to the manager */ ChartCellCollection::ChartCellCollection(const InputType &input, ChartManager &manager) - :ChartCellCollectionBase(input, CubeCellFactory(manager)) {} + :ChartCellCollectionBase(input, CubeCellFactory(manager), manager.GetParser()) {} } // namespace diff --git a/moses/ChartCellCollection.h b/moses/ChartCellCollection.h index 1edeb44500..5fdc98f583 100644 --- a/moses/ChartCellCollection.h +++ b/moses/ChartCellCollection.h @@ -30,12 +30,15 @@ namespace Moses { class InputType; class ChartManager; +class ChartParser; class ChartCellCollectionBase { public: - template ChartCellCollectionBase(const InputType &input, const Factory &factory) : - m_cells(input.GetSize()) { + template ChartCellCollectionBase(const InputType &input, + const Factory &factory, + const ChartParser &parser) + :m_cells(input.GetSize()) { size_t size = input.GetSize(); for (size_t startPos = 0; startPos < size; ++startPos) { diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index 17632b5715..85eec5cfc0 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -205,7 +205,7 @@ struct ChartCellBaseFactory { Manager::Manager(ttasksptr const& ttask) : BaseManager(ttask) - , cells_(m_source, ChartCellBaseFactory()) + , cells_(m_source, ChartCellBaseFactory(), parser_) , parser_(ttask, cells_) , n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize())) { } From e6951aa9f253b3f399c7b3c160c6bbee50d49f35 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Mon, 27 Apr 2015 23:01:14 +0400 Subject: [PATCH 2/5] codelite --- contrib/other-builds/util/util.project | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/other-builds/util/util.project b/contrib/other-builds/util/util.project index 573c78296f..1006ddb52e 100644 --- a/contrib/other-builds/util/util.project +++ b/contrib/other-builds/util/util.project @@ -30,6 +30,8 @@ + + From b7792b227a337c36d97d3c0979d11e6955ba368c Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 28 Apr 2015 12:29:58 +0400 Subject: [PATCH 3/5] script to convert arabic to bw, and vice versa --- scripts/other/buckwalter.perl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 scripts/other/buckwalter.perl diff --git a/scripts/other/buckwalter.perl b/scripts/other/buckwalter.perl new file mode 100755 index 0000000000..62544e212f --- /dev/null +++ b/scripts/other/buckwalter.perl @@ -0,0 +1,33 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Encode::Arabic::Buckwalter; +use Getopt::Long "GetOptions"; + +my $direction; +GetOptions('direction=i' => \$direction) + or exit(1); +# direction: 1=arabic->bw, 2=bw->arabic + +die("ERROR: need to set direction") unless defined($direction); + + + +while (my $line = ) { + chomp($line); + + my $lineOut; + if ($direction == 1) { + $lineOut = encode 'buckwalter', decode 'utf8', $line; + } + elsif ($direction == 2) { + $lineOut = encode 'utf8', decode 'buckwalter', $line; + } + else { + die("Unknown direction: $direction"); + } + print "$lineOut\n"; + +} + From 8f9bf7ea386feb1aef5413730bd627a1161c5928 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 28 Apr 2015 15:03:59 +0400 Subject: [PATCH 4/5] add -config --- scripts/training/wrappers/madamira-wrapper.perl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/training/wrappers/madamira-wrapper.perl b/scripts/training/wrappers/madamira-wrapper.perl index 9866e64211..5c1d0404f0 100755 --- a/scripts/training/wrappers/madamira-wrapper.perl +++ b/scripts/training/wrappers/madamira-wrapper.perl @@ -15,6 +15,7 @@ my $SCHEME = "D2"; my $KEEP_TMP = 0; my $MADA_DIR; +my $CONFIG; my $FACTORS_STR; my @FACTORS; @@ -24,9 +25,14 @@ "tmpdir=s" => \$TMPDIR, "keep-tmp" => \$KEEP_TMP, "mada-dir=s" => \$MADA_DIR, - "factors=s" => \$FACTORS_STR + "factors=s" => \$FACTORS_STR, + "config=s" => \$CONFIG ) or die("ERROR: unknown options"); +if (!defined($CONFIG)) { + $CONFIG = "$MADA_DIR/samples/sampleConfigFile.xml"; +} + $TMPDIR = abs_path($TMPDIR); print STDERR "TMPDIR=$TMPDIR \n"; @@ -65,7 +71,7 @@ $cmd = "$SPLIT_EXEC -l 10000 -a 7 -d $TMPDIR/input $TMPDIR/split/x"; `$cmd`; -$cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $MADA_DIR/samples/sampleConfigFile.xml ::: $TMPDIR/split/x*"; +$cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $CONFIG ::: $TMPDIR/split/x*"; print STDERR "Executing: $cmd\n"; `$cmd`; @@ -77,7 +83,7 @@ open(MADA_OUT,"<$infile.mada"); #binmode(MADA_OUT, ":utf8"); while(my $line = ) { - chop($line); + chomp($line); #print STDERR "line=$line \n"; if (index($line, "SENTENCE BREAK") == 0) { From 616b589da31cc0c78ae25c04f045877dd7ff7224 Mon Sep 17 00:00:00 2001 From: Jeroen Vermeulen Date: Wed, 29 Apr 2015 21:18:51 +0700 Subject: [PATCH 5/5] Fix a bunch of compiler warnings. Warnings are useful, but only if there are few! --- moses/FF/BleuScoreFeature.cpp | 1 + moses/FF/OSM-Feature/osmHyp.cpp | 11 +++++------ moses/FF/SpanLength.cpp | 3 +-- moses/FF/SyntaxRHS.cpp | 4 ---- moses/LM/BilingualLM.cpp | 2 +- .../fuzzy-match/FuzzyMatchWrapper.cpp | 4 ++-- moses/TranslationTask.cpp | 1 - .../extract-mixed-syntax/ConsistentPhrases.cpp | 16 ++++++++-------- 8 files changed, 18 insertions(+), 24 deletions(-) diff --git a/moses/FF/BleuScoreFeature.cpp b/moses/FF/BleuScoreFeature.cpp index 24887c3737..a989643867 100644 --- a/moses/FF/BleuScoreFeature.cpp +++ b/moses/FF/BleuScoreFeature.cpp @@ -880,6 +880,7 @@ const FFState* BleuScoreFeature::EmptyHypothesisState(const InputType& input) co bool BleuScoreFeature::IsUseable(const FactorMask &mask) const { + // TODO: Was this meant to return mask[0]!? bool ret = mask[0]; return 0; } diff --git a/moses/FF/OSM-Feature/osmHyp.cpp b/moses/FF/OSM-Feature/osmHyp.cpp index 422b7c933b..f971bbe8cb 100644 --- a/moses/FF/OSM-Feature/osmHyp.cpp +++ b/moses/FF/OSM-Feature/osmHyp.cpp @@ -128,7 +128,7 @@ void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp) State currState = lmState; State temp; - for (int i = 0; i & eSide , set & fSide , map < for (iter = eSide.begin(); iter != eSide.end(); iter++) { t = tS[*iter]; - for (int i = 0; i < t.size(); i++) { + for (size_t i = 0; i < t.size(); i++) { fSide.insert(t[i]); } @@ -472,7 +471,7 @@ void osmHypothesis :: getMeCepts ( set & eSide , set & fSide , map < t = sT[*iter]; - for (int i = 0 ; i & align , int startIndex , int int tgt; - for (int i = 0; i < align.size(); i+=2) { + for (size_t i = 0; i < align.size(); i+=2) { src = align[i]; tgt = align[i+1]; tS[tgt].push_back(src); diff --git a/moses/FF/SpanLength.cpp b/moses/FF/SpanLength.cpp index 0e14069ee1..91ac3ff896 100644 --- a/moses/FF/SpanLength.cpp +++ b/moses/FF/SpanLength.cpp @@ -45,8 +45,7 @@ void SpanLength::EvaluateWithSourceContext(const InputType &input const SpanLengthPhraseProperty *slProp = static_cast(property); - const Phrase *ruleSource = targetPhrase.GetRuleSource(); - assert(ruleSource); + assert(targetPhrase.GetRuleSource()); float score = 0; for (size_t i = 0; i < stackVec->size(); ++i) { diff --git a/moses/FF/SyntaxRHS.cpp b/moses/FF/SyntaxRHS.cpp index 80f9b21bce..a064778553 100644 --- a/moses/FF/SyntaxRHS.cpp +++ b/moses/FF/SyntaxRHS.cpp @@ -29,10 +29,6 @@ void SyntaxRHS::EvaluateWithSourceContext(const InputType &input , ScoreComponentCollection *estimatedFutureScore) const { assert(stackVec); - for (size_t i = 0; i < stackVec->size(); ++i) { - const ChartCellLabel &cell = *stackVec->at(i); - - } if (targetPhrase.GetNumNonTerminals()) { vector newScores(m_numScoreComponents); diff --git a/moses/LM/BilingualLM.cpp b/moses/LM/BilingualLM.cpp index fb59696d4d..d881c46161 100644 --- a/moses/LM/BilingualLM.cpp +++ b/moses/LM/BilingualLM.cpp @@ -106,7 +106,7 @@ size_t BilingualLM::selectMiddleAlignment( { set::iterator it = alignment_links.begin(); - for (int i = 0; i < (alignment_links.size() - 1) / 2; ++i) { + for (size_t i = 0; i < (alignment_links.size() - 1) / 2; ++i) { ++it; } diff --git a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp index 26dce03d02..8cc2e3f57e 100644 --- a/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp +++ b/moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp @@ -716,7 +716,7 @@ void FuzzyMatchWrapper::basic_fuzzy_match( vector< vector< WORD_ID > > source, } unsigned int best_cost = input_length * (100-min_match) / 100 + 2; string best_path = ""; - int best_match = -1; + //int best_match = -1; // go through all corpus sentences for(unsigned int s=0; s > source, if (cost < best_cost) { best_cost = cost; best_path = path; - best_match = s; + //best_match = s; } } //cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl; diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index b8ad559780..acb84c434d 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -131,7 +131,6 @@ void TranslationTask::Run() // shorthand for "global data" - const StaticData &staticData = StaticData::Instance(); const size_t translationId = m_source->GetTranslationId(); // report wall time spent on translation diff --git a/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp index f03a61840a..b1d64fc546 100644 --- a/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp +++ b/phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp @@ -19,10 +19,10 @@ ConsistentPhrases::ConsistentPhrases() ConsistentPhrases::~ConsistentPhrases() { - for (int start = 0; start < m_coll.size(); ++start) { + for (size_t start = 0; start < m_coll.size(); ++start) { std::vector &allSourceStart = m_coll[start]; - for (int size = 0; size < allSourceStart.size(); ++size) { + for (size_t size = 0; size < allSourceStart.size(); ++size) { Coll &coll = allSourceStart[size]; Moses::RemoveAllInColl(coll); } @@ -48,8 +48,8 @@ void ConsistentPhrases::Add(int sourceStart, int sourceEnd, targetStart, targetEnd, params); - pair inserted = coll.insert(cp); - assert(inserted.second); + assert(coll.find(cp) == coll.end()); + coll.insert(cp); } const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const @@ -69,10 +69,10 @@ ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceE std::string ConsistentPhrases::Debug() const { std::stringstream out; - for (int start = 0; start < m_coll.size(); ++start) { + for (size_t start = 0; start < m_coll.size(); ++start) { const std::vector &allSourceStart = m_coll[start]; - for (int size = 0; size < allSourceStart.size(); ++size) { + for (size_t size = 0; size < allSourceStart.size(); ++size) { const Coll &coll = allSourceStart[size]; Coll::const_iterator iter; @@ -89,9 +89,9 @@ std::string ConsistentPhrases::Debug() const void ConsistentPhrases::AddHieroNonTerms(const Parameter ¶ms) { // add [X] labels everywhere - for (int i = 0; i < m_coll.size(); ++i) { + for (size_t i = 0; i < m_coll.size(); ++i) { vector &inner = m_coll[i]; - for (int j = 0; j < inner.size(); ++j) { + for (size_t j = 0; j < inner.size(); ++j) { ConsistentPhrases::Coll &coll = inner[j]; ConsistentPhrases::Coll::iterator iter; for (iter = coll.begin(); iter != coll.end(); ++iter) {