Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fa36b60
commit 9b0923d
Showing
7 changed files
with
170 additions
and
296 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,133 +1,86 @@ | ||
#include <iostream> // std::cout | ||
#include <fstream> // std::ifstream | ||
#include<string> | ||
#include<sstream> | ||
#include<vector> | ||
#include<map> | ||
|
||
#include "Desegmenter.h" | ||
#include <boost/algorithm/string/replace.hpp> | ||
|
||
using namespace std; | ||
|
||
#include <fstream> | ||
#include <iostream> | ||
#include<string> | ||
#include<sstream> | ||
#include<vector> | ||
#include<map> | ||
#include "Desegmenter.h" | ||
#include <boost/algorithm/string/replace.hpp> | ||
|
||
using namespace std; | ||
|
||
namespace Moses | ||
{ | ||
void Desegmenter::Load(const string filename){ | ||
|
||
std::ifstream myFile(filename.c_str() );//, std::ifstream::in); | ||
if (myFile.is_open()){ | ||
cerr << "Desegmentation File open successful." << endl; | ||
string line; | ||
while (getline(myFile, line)){ | ||
stringstream ss(line); | ||
string token; | ||
vector<string> myline; | ||
while (getline(ss, token, '\t')){ | ||
myline.push_back(token); | ||
} | ||
mmDesegTable.insert(pair<string, string>(myline[2], myline[1] )); | ||
} | ||
myFile.close(); | ||
} | ||
else | ||
cerr << "open() failed: check if Desegmentation file is in right folder" << endl; | ||
} | ||
|
||
|
||
|
||
|
||
vector<string> Desegmenter::Search(string myKey){ | ||
multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey); | ||
vector<string> result; | ||
if (mmiPairFound != mmDesegTable.end()){ | ||
size_t nNumPairsInMap = mmDesegTable.count(myKey); | ||
|
||
for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter){ | ||
|
||
if (mmiPairFound != mmDesegTable.end()) { | ||
result.push_back(mmiPairFound->second); | ||
} | ||
++mmiPairFound; | ||
} | ||
return result; | ||
} | ||
else{ | ||
string rule_deseg ; | ||
rule_deseg = ApplyRules(myKey); | ||
result.push_back(rule_deseg); | ||
return result; | ||
} | ||
} | ||
|
||
string Desegmenter::ApplyRules(string & segToken){ | ||
string desegToken=segToken; | ||
|
||
boost::replace_all(desegToken, "l+ All", "ll"); | ||
boost::replace_all(desegToken, "l+ Al", "ll"); | ||
boost::replace_all(desegToken, "y+ y ", "y"); | ||
boost::replace_all(desegToken, "p+ ", "t"); | ||
boost::replace_all(desegToken, "' +", "}"); | ||
boost::replace_all(desegToken, "y +", "A"); | ||
boost::replace_all(desegToken, "n +n", "n"); | ||
boost::replace_all(desegToken, "mn +m", "mm"); | ||
boost::replace_all(desegToken, "En +m", "Em"); | ||
boost::replace_all(desegToken, "An +lA", "Em"); | ||
boost::replace_all(desegToken, "-LRB-", "("); | ||
boost::replace_all(desegToken, "-RRB-", ")"); | ||
boost::replace_all(desegToken, "+ +", ""); | ||
|
||
boost::replace_all(desegToken, "+ ", ""); | ||
boost::replace_all(desegToken, " +", ""); | ||
|
||
return desegToken; | ||
} | ||
|
||
|
||
Desegmenter::~Desegmenter() | ||
{} | ||
|
||
|
||
|
||
/* | ||
void Completer::Load(const string filename){ | ||
std::ifstream myFile(filename.c_str() ); | ||
if (myFile.is_open()){ | ||
cerr << "Completer File open successful." << endl; | ||
string line; | ||
while (getline(myFile, line)){ | ||
stringstream ss(line); | ||
string token; | ||
vector<string> myline; | ||
while (getline(ss, token, '\t')){ | ||
myline.push_back(token); | ||
} | ||
mmDetok.insert(pair<string, string>(myline[0], myline[1] )); | ||
} | ||
myFile.close(); | ||
} | ||
else | ||
cerr << "open() failed: check if Desegmentation file is in right folder" << endl; | ||
//return mmDetok; | ||
} | ||
string Completer::Search(string myKey){ | ||
//unordered_multimap<string, string>::const_iterator mmiPairFound = mmDetok.find(myKey); | ||
map<string, string>::const_iterator mi = mmDetok.find(myKey); | ||
//vector<string> result; | ||
string result=""; | ||
if (mi != mmDetok.end()){ | ||
result=mi->second; | ||
return result; | ||
} | ||
else{ | ||
return result; | ||
} | ||
} | ||
Completer::~Completer() | ||
{} | ||
*/ | ||
|
||
} | ||
{ | ||
void Desegmenter::Load(const string filename){ | ||
|
||
std::ifstream myFile(filename.c_str() ); | ||
if (myFile.is_open()){ | ||
cerr << "Desegmentation File open successful." << endl; | ||
string line; | ||
while (getline(myFile, line)){ | ||
stringstream ss(line); | ||
string token; | ||
vector<string> myline; | ||
while (getline(ss, token, '\t')){ | ||
myline.push_back(token); | ||
} | ||
mmDesegTable.insert(pair<string, string>(myline[2], myline[1] )); | ||
} | ||
myFile.close(); | ||
} | ||
else | ||
cerr << "open() failed: check if Desegmentation file is in right folder" << endl; | ||
} | ||
|
||
|
||
vector<string> Desegmenter::Search(string myKey){ | ||
multimap<string, string>::const_iterator mmiPairFound = mmDesegTable.find(myKey); | ||
vector<string> result; | ||
if (mmiPairFound != mmDesegTable.end()){ | ||
size_t nNumPairsInMap = mmDesegTable.count(myKey); | ||
for (size_t nValuesCounter = 0; nValuesCounter < nNumPairsInMap; ++nValuesCounter){ | ||
if (mmiPairFound != mmDesegTable.end()) { | ||
result.push_back(mmiPairFound->second); | ||
} | ||
++mmiPairFound; | ||
} | ||
return result; | ||
} | ||
else{ | ||
string rule_deseg ; | ||
rule_deseg = ApplyRules(myKey); | ||
result.push_back(rule_deseg); | ||
return result; | ||
} | ||
} | ||
|
||
|
||
string Desegmenter::ApplyRules(string & segToken){ | ||
|
||
string desegToken=segToken; | ||
if (!simple){ | ||
boost::replace_all(desegToken, "l+ All", "ll"); | ||
boost::replace_all(desegToken, "l+ Al", "ll"); | ||
boost::replace_all(desegToken, "y+ y ", "y"); | ||
boost::replace_all(desegToken, "p+ ", "t"); | ||
boost::replace_all(desegToken, "' +", "}"); | ||
boost::replace_all(desegToken, "y +", "A"); | ||
boost::replace_all(desegToken, "n +n", "n"); | ||
boost::replace_all(desegToken, "mn +m", "mm"); | ||
boost::replace_all(desegToken, "En +m", "Em"); | ||
boost::replace_all(desegToken, "An +lA", "Em"); | ||
boost::replace_all(desegToken, "-LRB-", "("); | ||
boost::replace_all(desegToken, "-RRB-", ")"); | ||
} | ||
|
||
boost::replace_all(desegToken, "+ +", ""); | ||
boost::replace_all(desegToken, "+ ", ""); | ||
boost::replace_all(desegToken, " +", ""); | ||
|
||
return desegToken; | ||
} | ||
|
||
Desegmenter::~Desegmenter() | ||
{} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,52 +1,31 @@ | ||
#pragma once | ||
|
||
#include<string> | ||
#include<map> | ||
|
||
|
||
using namespace std; | ||
|
||
#pragma once | ||
|
||
#include<string> | ||
#include<map> | ||
|
||
|
||
using namespace std; | ||
|
||
namespace Moses | ||
{ | ||
class Desegmenter | ||
{ | ||
private: | ||
{ | ||
class Desegmenter | ||
{ | ||
private: | ||
std::multimap<string, string> mmDesegTable; | ||
std::string filename; | ||
void Load(const string filename); | ||
|
||
public: | ||
Desegmenter(const std::string& file){ | ||
filename = file; | ||
Load(filename);//, mmDetok); | ||
} | ||
string getFileName(){ return filename; } | ||
|
||
vector<string> Search(string myKey); | ||
string ApplyRules(string &); | ||
|
||
~Desegmenter(); | ||
}; | ||
|
||
|
||
/*class Completer | ||
{ | ||
private: | ||
//std::multimap<string, string,std::less< std::string > > mmDetok; | ||
std::map<string, string> mmDetok; | ||
std::string filename; | ||
void Load(const string filename); | ||
public: | ||
Completer(const std::string& file){ | ||
filename = file; | ||
Load(filename);//, mmDetok); | ||
} | ||
string getFileName(){ return filename; } | ||
string Search(string myKey); | ||
~Completer(); | ||
}; | ||
*/ | ||
|
||
} | ||
std::string filename; | ||
bool simple; | ||
void Load(const string filename); | ||
|
||
public: | ||
Desegmenter(const std::string& file, const bool scheme){ | ||
filename = file; | ||
simple=scheme; | ||
Load(filename); | ||
} | ||
string getFileName(){ return filename; } | ||
|
||
vector<string> Search(string myKey); | ||
string ApplyRules(string &); | ||
~Desegmenter(); | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.