Skip to content

Commit

Permalink
added dese-schemeoption;
Browse files Browse the repository at this point in the history
  • Loading branch information
msalameh83 committed Apr 4, 2016
1 parent fa36b60 commit 9b0923d
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 296 deletions.
217 changes: 85 additions & 132 deletions moses/FF/Dsg-Feature/Desegmenter.cpp
@@ -1,133 +1,86 @@
#include <iostream> // std::cout
#include <fstream> // std::ifstream
#include<string>
#include<sstream>
#include<vector>
#include<map>

#include "Desegmenter.h"
#include <boost/algorithm/string/replace.hpp>

using namespace std;

#include <fstream>
#include <iostream>
#include<string>
#include<sstream>
#include<vector>
#include<map>
#include "Desegmenter.h"
#include <boost/algorithm/string/replace.hpp>

using namespace std;

namespace Moses
{
void Desegmenter::Load(const string filename){

std::ifstream myFile(filename.c_str() );//, std::ifstream::in);
if (myFile.is_open()){
cerr << "Desegmentation File open successful." << endl;
string line;
while (getline(myFile, line)){
stringstream ss(line);
string token;
vector<string> myline;
while (getline(ss, token, '\t')){
myline.push_back(token);
}
mmDesegTable.insert(pair<string, string>(myline[2], myline[1] ));
}
myFile.close();
}
else
cerr << "open() failed: check if Desegmentation file is in right folder" << endl;
}




vector<string> Desegmenter::Search(string myKey){
multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey);
vector<string> result;
if (mmiPairFound != mmDesegTable.end()){
size_t nNumPairsInMap = mmDesegTable.count(myKey);

for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter){

if (mmiPairFound != mmDesegTable.end()) {
result.push_back(mmiPairFound->second);
}
++mmiPairFound;
}
return result;
}
else{
string rule_deseg ;
rule_deseg = ApplyRules(myKey);
result.push_back(rule_deseg);
return result;
}
}

string Desegmenter::ApplyRules(string & segToken){
string desegToken=segToken;

boost::replace_all(desegToken, "l+ All", "ll");
boost::replace_all(desegToken, "l+ Al", "ll");
boost::replace_all(desegToken, "y+ y ", "y");
boost::replace_all(desegToken, "p+ ", "t");
boost::replace_all(desegToken, "' +", "}");
boost::replace_all(desegToken, "y +", "A");
boost::replace_all(desegToken, "n +n", "n");
boost::replace_all(desegToken, "mn +m", "mm");
boost::replace_all(desegToken, "En +m", "Em");
boost::replace_all(desegToken, "An +lA", "Em");
boost::replace_all(desegToken, "-LRB-", "(");
boost::replace_all(desegToken, "-RRB-", ")");
boost::replace_all(desegToken, "+ +", "");

boost::replace_all(desegToken, "+ ", "");
boost::replace_all(desegToken, " +", "");

return desegToken;
}


Desegmenter::~Desegmenter()
{}



/*
void Completer::Load(const string filename){
std::ifstream myFile(filename.c_str() );
if (myFile.is_open()){
cerr << "Completer File open successful." << endl;
string line;
while (getline(myFile, line)){
stringstream ss(line);
string token;
vector<string> myline;
while (getline(ss, token, '\t')){
myline.push_back(token);
}
mmDetok.insert(pair<string, string>(myline[0], myline[1] ));
}
myFile.close();
}
else
cerr << "open() failed: check if Desegmentation file is in right folder" << endl;
//return mmDetok;
}
string Completer::Search(string myKey){
//unordered_multimap<string, string>::const_iterator mmiPairFound = mmDetok.find(myKey);
map<string, string>::const_iterator mi = mmDetok.find(myKey);
//vector<string> result;
string result="";
if (mi != mmDetok.end()){
result=mi->second;
return result;
}
else{
return result;
}
}
Completer::~Completer()
{}
*/

}
{
void Desegmenter::Load(const string filename){

std::ifstream myFile(filename.c_str() );
if (myFile.is_open()){
cerr << "Desegmentation File open successful." << endl;
string line;
while (getline(myFile, line)){
stringstream ss(line);
string token;
vector<string> myline;
while (getline(ss, token, '\t')){
myline.push_back(token);
}
mmDesegTable.insert(pair<string, string>(myline[2], myline[1] ));
}
myFile.close();
}
else
cerr << "open() failed: check if Desegmentation file is in right folder" << endl;
}


vector<string> Desegmenter::Search(string myKey){
multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey);
vector<string> result;
if (mmiPairFound != mmDesegTable.end()){
size_t nNumPairsInMap = mmDesegTable.count(myKey);
for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter){
if (mmiPairFound != mmDesegTable.end()) {
result.push_back(mmiPairFound->second);
}
++mmiPairFound;
}
return result;
}
else{
string rule_deseg ;
rule_deseg = ApplyRules(myKey);
result.push_back(rule_deseg);
return result;
}
}


string Desegmenter::ApplyRules(string & segToken){

string desegToken=segToken;
if (!simple){
boost::replace_all(desegToken, "l+ All", "ll");
boost::replace_all(desegToken, "l+ Al", "ll");
boost::replace_all(desegToken, "y+ y ", "y");
boost::replace_all(desegToken, "p+ ", "t");
boost::replace_all(desegToken, "' +", "}");
boost::replace_all(desegToken, "y +", "A");
boost::replace_all(desegToken, "n +n", "n");
boost::replace_all(desegToken, "mn +m", "mm");
boost::replace_all(desegToken, "En +m", "Em");
boost::replace_all(desegToken, "An +lA", "Em");
boost::replace_all(desegToken, "-LRB-", "(");
boost::replace_all(desegToken, "-RRB-", ")");
}

boost::replace_all(desegToken, "+ +", "");
boost::replace_all(desegToken, "+ ", "");
boost::replace_all(desegToken, " +", "");

return desegToken;
}

Desegmenter::~Desegmenter()
{}

}
79 changes: 29 additions & 50 deletions moses/FF/Dsg-Feature/Desegmenter.h
@@ -1,52 +1,31 @@
#pragma once

#include<string>
#include<map>


using namespace std;

#pragma once

#include<string>
#include<map>


using namespace std;

namespace Moses
{
class Desegmenter
{
private:
{
class Desegmenter
{
private:
std::multimap<string, string> mmDesegTable;
std::string filename;
void Load(const string filename);

public:
Desegmenter(const std::string& file){
filename = file;
Load(filename);//, mmDetok);
}
string getFileName(){ return filename; }

vector<string> Search(string myKey);
string ApplyRules(string &);

~Desegmenter();
};


/*class Completer
{
private:
//std::multimap<string, string,std::less< std::string > > mmDetok;
std::map<string, string> mmDetok;
std::string filename;
void Load(const string filename);
public:
Completer(const std::string& file){
filename = file;
Load(filename);//, mmDetok);
}
string getFileName(){ return filename; }
string Search(string myKey);
~Completer();
};
*/

}
std::string filename;
bool simple;
void Load(const string filename);

public:
Desegmenter(const std::string& file, const bool scheme){
filename = file;
simple=scheme;
Load(filename);
}
string getFileName(){ return filename; }

vector<string> Search(string myKey);
string ApplyRules(string &);
~Desegmenter();
};
}
39 changes: 20 additions & 19 deletions moses/FF/Dsg-Feature/DsgModel.cpp
Expand Up @@ -28,23 +28,23 @@ namespace Moses
void DesegModel :: readLanguageModel(const char *lmFile)
{
DSGM = ConstructDsgLM(m_lmPath.c_str());
State startState = DSGM->NullContextState(); // MSAL
desegT=new Desegmenter(m_desegPath);// Desegmentation Table
State startState = DSGM->NullContextState();
desegT=new Desegmenter(m_desegPath,m_simple);// Desegmentation Table
}


void DesegModel::Load(AllOptions::ptr const& opts)
{
m_options = opts; //ADDED
m_options = opts;
readLanguageModel(m_lmPath.c_str());
}



void DesegModel:: EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedScores) const
{

dsgHypothesis obj;
Expand All @@ -62,16 +62,14 @@ namespace Moses
obj.calculateDsgProbinIsol(*DSGM,*desegT,align);
obj.populateScores(scores,numFeatures);
estimatedScores.PlusEquals(this, scores);

}


FFState* DesegModel::EvaluateWhenApplied(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{

const TargetPhrase &target = cur_hypo.GetCurrTargetPhrase();
const Range &src_rng =cur_hypo.GetCurrSourceWordsRange();
const AlignmentInfo &align = cur_hypo.GetCurrTargetPhrase().GetAlignTerm();
Expand All @@ -97,20 +95,18 @@ namespace Moses
}

FFState* DesegModel::EvaluateWhenApplied(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
UTIL_THROW2("Chart decoding not support by UTIL_THROW2");

}

const FFState* DesegModel::EmptyHypothesisState(const InputType &input) const
{
VERBOSE(3,"DesegModel::EmptyHypothesisState()" << endl);
State startState = DSGM->BeginSentenceState();
dsgState ss= dsgState(startState);
/////ss.setDelta(0.0);
return new dsgState(ss);
}

Expand All @@ -134,11 +130,16 @@ namespace Moses
tFactor = Scan<int>(value);
} else if (key == "optimistic") {
if (value == "n")
optimistic = 0;
optimistic = 0;
else
optimistic = 1;
optimistic = 1;
} else if (key == "deseg-path") {
m_desegPath = value;
m_desegPath = Scan<int>(value);
} else if (key == "deseg-scheme") {
if(value == "s")
m_simple = 1;
else
m_simple = 0;
} else if (key == "order") {
order = Scan<int>(value);
} else {
Expand Down

0 comments on commit 9b0923d

Please sign in to comment.