Skip to content

Commit

Permalink
move doc for suffix array
Browse files Browse the repository at this point in the history
  • Loading branch information
Hieu Hoang committed Jul 29, 2014
2 parents d5989af + f9d1673 commit 24d740d
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 27 deletions.
1 change: 1 addition & 0 deletions doc/Mmsapt.howto
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
The documentation for memory-mapped, dynamic suffix arrays has moved to
http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc40

1 change: 1 addition & 0 deletions moses/FF/Factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ FeatureRegistry::FeatureRegistry()
#endif
#ifdef PT_UG
MOSES_FNAME(Mmsapt);
MOSES_FNAME2("PhraseDictionaryBitextSampling",Mmsapt); // that's an alias for Mmsapt!
#endif
#ifdef HAVE_PROBINGPT
MOSES_FNAME(ProbingPT);
Expand Down
8 changes: 4 additions & 4 deletions moses/TranslationModel/DynSAInclude/FileHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ bool FileHandler::setStreamBuffer(bool checkExists)
{
// redirect stdin or stdout if necesary
if (path_ == FileHandler::kStdInDescriptor) {
UTIL_THROW_IF2(flags_ & std::ios::in == 0,
"Incorrect flags: " << flags_);
UTIL_THROW_IF2((flags_ & std::ios::in) == 0,
"Incorrect flags: " << flags_);
std::streambuf* sb = std::cin.rdbuf();
buffer_ = sb;
} else if (path_ == FileHandler::kStdOutDescriptor) {
UTIL_THROW_IF2(flags_ & std::ios::out == 0,
"Incorrect flags: " << flags_);
UTIL_THROW_IF2((flags_ & std::ios::out) == 0,
"Incorrect flags: " << flags_);
std::streambuf* sb = std::cout.rdbuf();
buffer_ = sb;
} else {
Expand Down
23 changes: 18 additions & 5 deletions moses/TranslationModel/UG/mmsapt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,20 @@ namespace Moses
if (m != param.end())
read_config_file(m->second,param);

bname = param["base"];
m = param.find("base");
if (m != param.end())
{
bname = m->second;
m = param.find("path");
UTIL_THROW_IF2((m != param.end() && m->second != bname),
"Conflicting aliases for path:\n"
<< "path=" << string(m->second) << "\n"
<< "base=" << bname.c_str() );
}
else bname = param["path"];
L1 = param["L1"];
L2 = param["L2"];

UTIL_THROW_IF2(bname.size() == 0, "Missing corpus base name at " << HERE);
UTIL_THROW_IF2(L1.size() == 0, "Missing L1 tag at " << HERE);
UTIL_THROW_IF2(L2.size() == 0, "Missing L2 tag at " << HERE);
Expand All @@ -183,7 +193,8 @@ namespace Moses
m_workers = atoi(param.insert(dflt).first->second.c_str());
m_workers = min(m_workers,24UL);

dflt = pair<string,string>("limit","20");

dflt = pair<string,string>("table-limit","20");
m_tableLimit = atoi(param.insert(dflt).first->second.c_str());

dflt = pair<string,string>("cache","10000");
Expand Down Expand Up @@ -211,24 +222,26 @@ namespace Moses
known_parameters.push_back("L1");
known_parameters.push_back("L2");
known_parameters.push_back("Mmsapt");
known_parameters.push_back("base");
known_parameters.push_back("base"); // alias for path
known_parameters.push_back("cache");
known_parameters.push_back("coh");
known_parameters.push_back("config");
known_parameters.push_back("extra");
known_parameters.push_back("input-factor");
known_parameters.push_back("lexalpha");
known_parameters.push_back("limit");
// known_parameters.push_back("limit"); // replaced by "table-limit"
known_parameters.push_back("logcnt");
known_parameters.push_back("name");
known_parameters.push_back("num-features");
known_parameters.push_back("output-factor");
known_parameters.push_back("path");
known_parameters.push_back("pbwd");
known_parameters.push_back("pfwd");
known_parameters.push_back("prov");
known_parameters.push_back("rare");
known_parameters.push_back("sample");
known_parameters.push_back("smooth");
known_parameters.push_back("table-limit");
known_parameters.push_back("unal");
known_parameters.push_back("workers");
for (map<string,string>::iterator m = param.begin(); m != param.end(); ++m)
Expand Down
36 changes: 18 additions & 18 deletions moses/TranslationModel/fuzzy-match/create_xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ void create_xml(const string &inPath)
ofstream rule((inPath + ".extract").c_str());
ofstream ruleInv((inPath + ".extract.inv").c_str());

int setenceId;
float score;
// int setenceId;
// float score;
string source, target, align, path;
string *input = NULL;
int count;
Expand All @@ -47,11 +47,11 @@ void create_xml(const string &inPath)
//cout << inLine << endl;
switch (step) {
case 0:
setenceId = Scan<int>(inLine);
/*setenceId = */ Scan<int>(inLine);
++step;
break;
case 1:
score = Scan<float>(inLine);
/*score = */ Scan<float>(inLine);
++step;
break;
case 2:
Expand Down Expand Up @@ -124,7 +124,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
int start_s = 0, start_i = 0;

//cerr << input << endl << source << endl << target << endl << path << endl;
for ( int p = 0 ; p < path.length() ; p++ ) {
for ( int p = 0 ; p < int(path.length()) ; p++ ) {
string action = path.substr(p, 1);

// beginning of a mismatch
Expand Down Expand Up @@ -176,7 +176,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
}

// end of sentence? add to end
if ( start_t == 1000 && i > inputToks.size() - 1 ) {
if ( start_t == 1000 && i > int(inputToks.size()) - 1 ) {
start_t = targetsToks.size() - 1;
}

Expand Down Expand Up @@ -216,13 +216,13 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
if ( action != "I" ) {
//cerr << " ->";

if (s < alignments.m_alignS2T.size()) {
if (s < int(alignments.m_alignS2T.size())) {
const std::map<int, int> &targets = alignments.m_alignS2T[s];
//cerr << "s=" << s << endl;

std::map<int, int>::const_iterator iter;
for (iter = targets.begin(); iter != targets.end(); ++iter) {
int tt = iter->first;
// int tt = iter->first;
//cerr << " " << tt;
}
}
Expand All @@ -245,7 +245,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
} // for ( int p = 0

//cerr << target << endl;
for (int i = 0; i < targetBitmap.size(); ++i) {
for (size_t i = 0; i < targetBitmap.size(); ++i) {
//cerr << targetBitmap[i];
}
//cerr << endl;
Expand All @@ -260,13 +260,13 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
int rule_pos_s = 0;
map<int, int> ruleAlignS;

for (int i = 0 ; i < inputBitmap.size() ; ++i ) {
for (int i = 0 ; i < int(inputBitmap.size()) ; ++i ) {
if ( inputBitmap[i] ) {
ret.ruleS += inputToks[i] + " ";
ruleAlignS[ alignI2S[i] ] = rule_pos_s++;
}

for (int j = 0; j < nonTerms.size(); ++j) {
for (size_t j = 0; j < nonTerms.size(); ++j) {
map<string, int> &nt = nonTerms[j];
if (i == nt["start_i"]) {
ret.ruleS += "[X][X] ";
Expand All @@ -284,7 +284,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
ruleAlignT[t] = rule_pos_t++;
}

for (int i = 0; i < nonTerms.size(); ++i) {
for (size_t i = 0; i < nonTerms.size(); ++i) {
map<string, int> &nt = nonTerms[i];

if (t == nt["start_t"]) {
Expand All @@ -300,7 +300,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
for (map<int, int>::const_iterator iter = ruleAlignS.begin(); iter != ruleAlignS.end(); ++iter) {
int s = iter->first;

if (s < alignments.m_alignS2T.size()) {
if (s < int(alignments.m_alignS2T.size())) {
const std::map<int, int> &targets = alignments.m_alignS2T[s];

std::map<int, int>::const_iterator iter;
Expand All @@ -316,7 +316,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &

//cerr << "numAlign=" << numAlign << endl;

for (int i = 0; i < nonTerms.size(); ++i) {
for (size_t i = 0; i < nonTerms.size(); ++i) {
map<string, int> &nt = nonTerms[i];
ret.ruleAlignment += SPrint(nt["rule_pos_s"]) + "-" + SPrint(nt["rule_pos_t"]) + " ";
++numAlign;
Expand All @@ -329,7 +329,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
ret.ruleAlignment = TrimInternal(ret.ruleAlignment);

vector<string> ruleAlignmentToks = Tokenize(ret.ruleAlignment);
for (int i = 0; i < ruleAlignmentToks.size(); ++i) {
for (size_t i = 0; i < ruleAlignmentToks.size(); ++i) {
const string &alignPoint = ruleAlignmentToks[i];
vector<string> toks = Tokenize(alignPoint, "-");
assert(toks.size() == 2);
Expand All @@ -338,15 +338,15 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
ret.ruleAlignmentInv = TrimInternal(ret.ruleAlignmentInv);

// frame
ret.frame;
// ret.frame;
if (frameInput.find(-1) == frameInput.end())
ret.frame = frameInput[-1];

int currently_included = 0;
int start_t = -1;
targetBitmap.push_back(0);

for (int t = 0 ; t <= targetsToks.size() ; t++ ) {
for (size_t t = 0 ; t <= targetsToks.size() ; t++ ) {
// beginning of tm target inclusion
if ( !currently_included && targetBitmap[t] ) {
start_t = t;
Expand All @@ -360,7 +360,7 @@ CreateXMLRetValues createXML(int ruleCount, const string &source, const string &
if ( start_t >= 0 ) {
string target = "";
//cerr << "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
for (int tt = start_t ; tt < t + targetBitmap[t] ; tt++ ) {
for (size_t tt = start_t ; tt < t + targetBitmap[t] ; tt++ ) {
target += targetsToks[tt] + " ";
}
// target = Trim(target); TODO
Expand Down

0 comments on commit 24d740d

Please sign in to comment.