Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ugermann committed Apr 29, 2015
2 parents e4f5c69 + 616b589 commit 324b1a9
Show file tree
Hide file tree
Showing 15 changed files with 73 additions and 33 deletions.
5 changes: 3 additions & 2 deletions contrib/other-builds/moses/moses.project
Expand Up @@ -778,6 +778,8 @@
<File Name="../../../moses/PP/SpanLengthPhraseProperty.h"/>
<File Name="../../../moses/PP/TreeStructurePhraseProperty.h"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Static Library">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
Expand All @@ -796,6 +798,7 @@
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
<Preprocessor Value="WITH_THREADS"/>
</Compiler>
<Linker Options="" Required="yes"/>
<ResourceCompiler Options="" Required="no"/>
Expand Down Expand Up @@ -870,6 +873,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>
2 changes: 2 additions & 0 deletions contrib/other-builds/util/util.project
Expand Up @@ -30,6 +30,8 @@
<File Name="../../../util/string_piece.cc"/>
<File Name="../../../util/tokenize_piece_test.cc" ExcludeProjConfig="Debug"/>
<File Name="../../../util/usage.cc"/>
<File Name="../../../util/random.cc"/>
<File Name="../../../util/random.hh"/>
</VirtualDirectory>
<VirtualDirectory Name="double-conversion">
<File Name="../../../util/double-conversion/bignum-dtoa.cc"/>
Expand Down
3 changes: 2 additions & 1 deletion moses/ChartCellCollection.cpp
Expand Up @@ -22,6 +22,7 @@
#include "ChartCellCollection.h"
#include "InputType.h"
#include "WordsRange.h"
#include "ChartManager.h"

namespace Moses
{
Expand Down Expand Up @@ -51,7 +52,7 @@ class CubeCellFactory
\param manager reference back to the manager
*/
ChartCellCollection::ChartCellCollection(const InputType &input, ChartManager &manager)
:ChartCellCollectionBase(input, CubeCellFactory(manager)) {}
:ChartCellCollectionBase(input, CubeCellFactory(manager), manager.GetParser()) {}

} // namespace

7 changes: 5 additions & 2 deletions moses/ChartCellCollection.h
Expand Up @@ -30,12 +30,15 @@ namespace Moses
{
class InputType;
class ChartManager;
class ChartParser;

class ChartCellCollectionBase
{
public:
template <class Factory> ChartCellCollectionBase(const InputType &input, const Factory &factory) :
m_cells(input.GetSize()) {
template <class Factory> ChartCellCollectionBase(const InputType &input,
const Factory &factory,
const ChartParser &parser)
:m_cells(input.GetSize()) {

size_t size = input.GetSize();
for (size_t startPos = 0; startPos < size; ++startPos) {
Expand Down
1 change: 1 addition & 0 deletions moses/FF/BleuScoreFeature.cpp
Expand Up @@ -880,6 +880,7 @@ const FFState* BleuScoreFeature::EmptyHypothesisState(const InputType& input) co

bool BleuScoreFeature::IsUseable(const FactorMask &mask) const
{
// TODO: Was this meant to return mask[0]!?
bool ret = mask[0];
return 0;
}
Expand Down
11 changes: 5 additions & 6 deletions moses/FF/OSM-Feature/osmHyp.cpp
Expand Up @@ -128,7 +128,7 @@ void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
State currState = lmState;
State temp;

for (int i = 0; i<operations.size(); i++) {
for (size_t i = 0; i<operations.size(); i++) {
temp = currState;
opProb += ptrOp.Score(temp,operations[i],currState);
}
Expand Down Expand Up @@ -368,7 +368,6 @@ void osmHypothesis :: computeOSMFeature(int startIndex , WordsBitmap & coverageV
string english;
string source;
int j1;
int start = 0;
int targetIndex = 0;
doneTargetIndexes.clear();

Expand All @@ -391,7 +390,7 @@ void osmHypothesis :: computeOSMFeature(int startIndex , WordsBitmap & coverageV
}


for (int i = 0; i < ceptsInPhrase.size(); i++) {
for (size_t i = 0; i < ceptsInPhrase.size(); i++) {
source = "";
english = "";

Expand Down Expand Up @@ -462,7 +461,7 @@ void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <
for (iter = eSide.begin(); iter != eSide.end(); iter++) {
t = tS[*iter];

for (int i = 0; i < t.size(); i++) {
for (size_t i = 0; i < t.size(); i++) {
fSide.insert(t[i]);
}

Expand All @@ -472,7 +471,7 @@ void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <

t = sT[*iter];

for (int i = 0 ; i<t.size(); i++) {
for (size_t i = 0 ; i<t.size(); i++) {
eSide.insert(t[i]);
}

Expand All @@ -498,7 +497,7 @@ void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int
int tgt;


for (int i = 0; i < align.size(); i+=2) {
for (size_t i = 0; i < align.size(); i+=2) {
src = align[i];
tgt = align[i+1];
tS[tgt].push_back(src);
Expand Down
3 changes: 1 addition & 2 deletions moses/FF/SpanLength.cpp
Expand Up @@ -45,8 +45,7 @@ void SpanLength::EvaluateWithSourceContext(const InputType &input

const SpanLengthPhraseProperty *slProp = static_cast<const SpanLengthPhraseProperty*>(property);

const Phrase *ruleSource = targetPhrase.GetRuleSource();
assert(ruleSource);
assert(targetPhrase.GetRuleSource());

float score = 0;
for (size_t i = 0; i < stackVec->size(); ++i) {
Expand Down
4 changes: 0 additions & 4 deletions moses/FF/SyntaxRHS.cpp
Expand Up @@ -29,10 +29,6 @@ void SyntaxRHS::EvaluateWithSourceContext(const InputType &input
, ScoreComponentCollection *estimatedFutureScore) const
{
assert(stackVec);
for (size_t i = 0; i < stackVec->size(); ++i) {
const ChartCellLabel &cell = *stackVec->at(i);

}

if (targetPhrase.GetNumNonTerminals()) {
vector<float> newScores(m_numScoreComponents);
Expand Down
2 changes: 1 addition & 1 deletion moses/Incremental.cpp
Expand Up @@ -205,7 +205,7 @@ struct ChartCellBaseFactory {

Manager::Manager(ttasksptr const& ttask)
: BaseManager(ttask)
, cells_(m_source, ChartCellBaseFactory())
, cells_(m_source, ChartCellBaseFactory(), parser_)
, parser_(ttask, cells_)
, n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize()))
{ }
Expand Down
2 changes: 1 addition & 1 deletion moses/LM/BilingualLM.cpp
Expand Up @@ -106,7 +106,7 @@ size_t BilingualLM::selectMiddleAlignment(
{

set<size_t>::iterator it = alignment_links.begin();
for (int i = 0; i < (alignment_links.size() - 1) / 2; ++i) {
for (size_t i = 0; i < (alignment_links.size() - 1) / 2; ++i) {
++it;
}

Expand Down
4 changes: 2 additions & 2 deletions moses/TranslationModel/fuzzy-match/FuzzyMatchWrapper.cpp
Expand Up @@ -716,7 +716,7 @@ void FuzzyMatchWrapper::basic_fuzzy_match( vector< vector< WORD_ID > > source,
}
unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
string best_path = "";
int best_match = -1;
//int best_match = -1;

// go through all corpus sentences
for(unsigned int s=0; s<source.size(); s++) {
Expand All @@ -739,7 +739,7 @@ void FuzzyMatchWrapper::basic_fuzzy_match( vector< vector< WORD_ID > > source,
if (cost < best_cost) {
best_cost = cost;
best_path = path;
best_match = s;
//best_match = s;
}
}
//cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
Expand Down
1 change: 0 additions & 1 deletion moses/TranslationTask.cpp
Expand Up @@ -131,7 +131,6 @@ void TranslationTask::Run()


// shorthand for "global data"
const StaticData &staticData = StaticData::Instance();
const size_t translationId = m_source->GetTranslationId();

// report wall time spent on translation
Expand Down
16 changes: 8 additions & 8 deletions phrase-extract/extract-mixed-syntax/ConsistentPhrases.cpp
Expand Up @@ -19,10 +19,10 @@ ConsistentPhrases::ConsistentPhrases()

ConsistentPhrases::~ConsistentPhrases()
{
for (int start = 0; start < m_coll.size(); ++start) {
for (size_t start = 0; start < m_coll.size(); ++start) {
std::vector<Coll> &allSourceStart = m_coll[start];

for (int size = 0; size < allSourceStart.size(); ++size) {
for (size_t size = 0; size < allSourceStart.size(); ++size) {
Coll &coll = allSourceStart[size];
Moses::RemoveAllInColl(coll);
}
Expand All @@ -48,8 +48,8 @@ void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
targetStart, targetEnd,
params);

pair<Coll::iterator, bool> inserted = coll.insert(cp);
assert(inserted.second);
assert(coll.find(cp) == coll.end());
coll.insert(cp);
}

const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const
Expand All @@ -69,10 +69,10 @@ ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceE
std::string ConsistentPhrases::Debug() const
{
std::stringstream out;
for (int start = 0; start < m_coll.size(); ++start) {
for (size_t start = 0; start < m_coll.size(); ++start) {
const std::vector<Coll> &allSourceStart = m_coll[start];

for (int size = 0; size < allSourceStart.size(); ++size) {
for (size_t size = 0; size < allSourceStart.size(); ++size) {
const Coll &coll = allSourceStart[size];

Coll::const_iterator iter;
Expand All @@ -89,9 +89,9 @@ std::string ConsistentPhrases::Debug() const
void ConsistentPhrases::AddHieroNonTerms(const Parameter &params)
{
// add [X] labels everywhere
for (int i = 0; i < m_coll.size(); ++i) {
for (size_t i = 0; i < m_coll.size(); ++i) {
vector<Coll> &inner = m_coll[i];
for (int j = 0; j < inner.size(); ++j) {
for (size_t j = 0; j < inner.size(); ++j) {
ConsistentPhrases::Coll &coll = inner[j];
ConsistentPhrases::Coll::iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
Expand Down
33 changes: 33 additions & 0 deletions scripts/other/buckwalter.perl
@@ -0,0 +1,33 @@
#!/usr/bin/env perl

use strict;
use warnings;
use Encode::Arabic::Buckwalter;
use Getopt::Long "GetOptions";

my $direction;
GetOptions('direction=i' => \$direction)
or exit(1);
# direction: 1=arabic->bw, 2=bw->arabic

die("ERROR: need to set direction") unless defined($direction);



while (my $line = <STDIN>) {
chomp($line);

my $lineOut;
if ($direction == 1) {
$lineOut = encode 'buckwalter', decode 'utf8', $line;
}
elsif ($direction == 2) {
$lineOut = encode 'utf8', decode 'buckwalter', $line;
}
else {
die("Unknown direction: $direction");
}
print "$lineOut\n";

}

12 changes: 9 additions & 3 deletions scripts/training/wrappers/madamira-wrapper.perl
Expand Up @@ -15,6 +15,7 @@
my $SCHEME = "D2";
my $KEEP_TMP = 0;
my $MADA_DIR;
my $CONFIG;

my $FACTORS_STR;
my @FACTORS;
Expand All @@ -24,9 +25,14 @@
"tmpdir=s" => \$TMPDIR,
"keep-tmp" => \$KEEP_TMP,
"mada-dir=s" => \$MADA_DIR,
"factors=s" => \$FACTORS_STR
"factors=s" => \$FACTORS_STR,
"config=s" => \$CONFIG
) or die("ERROR: unknown options");

if (!defined($CONFIG)) {
$CONFIG = "$MADA_DIR/samples/sampleConfigFile.xml";
}

$TMPDIR = abs_path($TMPDIR);
print STDERR "TMPDIR=$TMPDIR \n";

Expand Down Expand Up @@ -65,7 +71,7 @@
$cmd = "$SPLIT_EXEC -l 10000 -a 7 -d $TMPDIR/input $TMPDIR/split/x";
`$cmd`;

$cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $MADA_DIR/samples/sampleConfigFile.xml ::: $TMPDIR/split/x*";
$cmd = "cd $MADA_DIR && parallel --jobs 4 java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar $MADA_DIR/MADAMIRA.jar -rawinput {} -rawoutdir $TMPDIR/out -rawconfig $CONFIG ::: $TMPDIR/split/x*";
print STDERR "Executing: $cmd\n";
`$cmd`;

Expand All @@ -77,7 +83,7 @@
open(MADA_OUT,"<$infile.mada");
#binmode(MADA_OUT, ":utf8");
while(my $line = <MADA_OUT>) {
chop($line);
chomp($line);
#print STDERR "line=$line \n";

if (index($line, "SENTENCE BREAK") == 0) {
Expand Down

0 comments on commit 324b1a9

Please sign in to comment.