Skip to content

Commit

Permalink
merged master into dynamic-models and solved conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolabertoldi committed Dec 13, 2014
2 parents cea2d9d + a0b6b6a commit e4eb201
Show file tree
Hide file tree
Showing 867 changed files with 62,413 additions and 20,421 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -1,3 +1,4 @@
tools
*.d
*.pyc
*.lo
Expand Down Expand Up @@ -79,3 +80,4 @@ nbproject/
mingw/MosesGUI/MosesGUI.e4p
mingw/MosesGUI/_eric4project/

contrib/m4m/merge-sorted
1 change: 1 addition & 0 deletions BUILD-INSTRUCTIONS.txt
@@ -1,3 +1,4 @@
Please see the Moses website on how to compile and run Moses
http://www.statmt.org/moses/?n=Development.GetStarted


71 changes: 65 additions & 6 deletions Jamroot
Expand Up @@ -70,14 +70,15 @@
#-a to build from scratch
#-j$NCPUS to compile in parallel
#--clean to clean
#--debug-build to build with Og. Only available with gcc 4.8+

import option ;
import modules ;
import path ;
path-constant TOP : . ;
include $(TOP)/jam-files/sanity.jam ;

boost 103600 ;
boost 104400 ;
external-lib z ;

lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
Expand All @@ -97,6 +98,11 @@ if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_min
echo "Tip: install tcmalloc for faster threading. See BUILD-INSTRUCTIONS.txt for more information." ;
}

if [ option.get "debug-build" : : "yes" ] {
requirements += <cxxflags>-Og ;
echo "Building with -Og to enable easier profiling and debugging. Only available on gcc 4.8+." ;
}

if [ option.get "enable-mpi" : : "yes" ] {
import mpi ;
using mpi ;
Expand All @@ -114,10 +120,37 @@ requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;

if [ option.get "with-cmph" ] {
if [ option.get "with-oxlm" ] {
external-lib boost_serialization ;
external-lib gomp ;
requirements += <library>boost_serialization ;
requirements += <library>gomp ;
}

if [ option.get "with-cmph" : : "yes" ] {
requirements += <define>HAVE_CMPH ;
}

if [ option.get "with-icu" : : "yes" ]
{
external-lib icuuc ;
external-lib icuio ;
external-lib icui18n ;
requirements += <library>icuuc/<link>shared ;
requirements += <library>icuio/<link>shared ;
requirements += <library>icui18n/<link>shared ;
requirements += <cxxflags>-fPIC ;
requirements += <address-model>64 ;
requirements += <runtime-link>shared ;
}

if [ option.get "with-probing-pt" : : "yes" ]
{
external-lib boost_serialization ;
requirements += <define>HAVE_PROBINGPT ;
requirements += <library>boost_serialization ;
}

project : default-build
<threading>multi
<warnings>on
Expand All @@ -140,17 +173,21 @@ project : requirements
;

#Add directories here if you want their incidental targets too (i.e. tests).
build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses-chart-cmd mira scripts regression-testing ;
build-projects lm util phrase-extract phrase-extract/syntax-common search moses moses/LM mert moses-cmd mira scripts regression-testing ;

if [ option.get "with-mm" : : "yes" ]
{
alias mm :
moses/TranslationModel/UG//ptable-describe-features
moses/TranslationModel/UG//count-ptable-features
moses/TranslationModel/UG//ptable-lookup
moses/TranslationModel/UG//spe-check-coverage
moses/TranslationModel/UG/mm//mtt-demo1
moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//mtt-dump
moses/TranslationModel/UG/mm//symal2mam
moses/TranslationModel/UG/mm//mam2symal
moses/TranslationModel/UG/mm//mam_verify
moses/TranslationModel/UG/mm//custom-pt
moses/TranslationModel/UG/mm//mmlex-build
moses/TranslationModel/UG/mm//mmlex-lookup
moses/TranslationModel/UG/mm//mtt-count-words
Expand All @@ -163,9 +200,19 @@ else
alias mm ;
}

if [ option.get "with-rephraser" : : "yes" ]
{
alias rephraser :
contrib/rephraser//paraphrase
;
}
else
{
alias rephraser ;
}

alias programs :
lm//programs
moses-chart-cmd//moses_chart
moses-cmd//programs
OnDiskPt//CreateOnDiskPt
OnDiskPt//queryOnDiskPt
Expand All @@ -177,15 +224,19 @@ phrase-extract//lexical-reordering
phrase-extract//extract-ghkm
phrase-extract//pcfg-extract
phrase-extract//pcfg-score
phrase-extract//extract-mixed-syntax
phrase-extract//score-stsg
phrase-extract//filter-rule-table
biconcor
mira//mira
contrib/server//mosesserver
mm
rephraser
;


install-bin-libs programs ;
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
install-headers headers-moses : moses//headers-to-install : moses ;

alias install : prefix-bin prefix-lib headers-base headers-moses ;
Expand All @@ -199,3 +250,11 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {
echo "To disable this message, delete $(TOP)/dist ." ;
echo ;
}

#local temp = [ _shell "bash source ./s.sh" ] ;
local temp = [ _shell "mkdir bin" ] ;
local temp = [ _shell "rm bin/moses_chart" ] ;
local temp = [ _shell "cd bin && ln -s moses moses_chart" ] ;



5 changes: 0 additions & 5 deletions NOTICE

This file was deleted.

4 changes: 2 additions & 2 deletions OnDiskPt/Jamfile
@@ -1,5 +1,5 @@
fakelib OnDiskPt : OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp OnDiskQuery.cpp ../moses//headers ;

exe CreateOnDiskPt : Main.cpp ../moses//moses OnDiskPt ;
exe queryOnDiskPt : queryOnDiskPt.cpp ../moses//moses OnDiskPt ;
exe CreateOnDiskPt : Main.cpp ..//boost_filesystem ../moses//moses OnDiskPt ;
exe queryOnDiskPt : queryOnDiskPt.cpp ..//boost_filesystem ../moses//moses OnDiskPt ;

44 changes: 21 additions & 23 deletions OnDiskPt/Main.cpp
Expand Up @@ -66,10 +66,9 @@ int main (int argc, char * const argv[])

PhraseNode &rootNode = onDiskWrapper.GetRootSourceNode();
size_t lineNum = 0;
char line[100000];
string line;

//while(getline(inStream, line))
while(inStream.getline(line, 100000)) {
while(getline(inStream, line)) {
lineNum++;
if (lineNum%1000 == 0) cerr << "." << flush;
if (lineNum%10000 == 0) cerr << ":" << flush;
Expand Down Expand Up @@ -107,8 +106,13 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc
return ret;
}

OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, const std::string &lineStr, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
char line[lineStr.size() + 1];
strcpy(line, lineStr.c_str());

stringstream sparseFeatures, property;

size_t scoreInd = 0;

// MAIN LOOP
Expand All @@ -118,6 +122,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
2 = scores
3 = align
4 = count
7 = properties
*/
char *tok = strtok (line," ");
OnDiskPt::PhrasePtr out(new Phrase());
Expand Down Expand Up @@ -148,29 +153,20 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
targetPhrase.CreateAlignFromString(tok);
break;
}
case 4:
++stage;
break;
/* case 5: {
// count info. Only store the 2nd one
float val = Moses::Scan<float>(tok);
misc[0] = val;
++stage;
break;
}*/
case 4: {
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
break;
}
case 5: {
// count info. Only store the 2nd one
//float val = Moses::Scan<float>(tok);
//misc[0] = val;
++stage;
// sparse features
sparseFeatures << tok << " ";
break;
}
case 6: {
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
++stage;
break;
property << tok << " ";
break;
}
default:
cerr << "ERROR in line " << line << endl;
Expand All @@ -183,6 +179,8 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
} // while (tok != NULL)

assert(scoreInd == numScores);
targetPhrase.SetSparseFeatures(Moses::Trim(sparseFeatures.str()));
targetPhrase.SetProperty(Moses::Trim(property.str()));
targetPhrase.SortAlign();
return out;
} // Tokenize()
Expand Down
2 changes: 1 addition & 1 deletion OnDiskPt/Main.h
Expand Up @@ -29,7 +29,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
, OnDiskPt::OnDiskWrapper &onDiskWrapper, int retSourceTarget);
OnDiskPt::PhrasePtr Tokenize(OnDiskPt::SourcePhrase &sourcePhrase, OnDiskPt::TargetPhrase &targetPhrase
, char *line, OnDiskPt::OnDiskWrapper &onDiskWrapper
, const std::string &lineStr, OnDiskPt::OnDiskWrapper &onDiskWrapper
, int numScores
, std::vector<float> &misc);

Expand Down
2 changes: 1 addition & 1 deletion OnDiskPt/OnDiskWrapper.cpp
Expand Up @@ -31,7 +31,7 @@ using namespace std;
namespace OnDiskPt
{

int OnDiskWrapper::VERSION_NUM = 5;
int OnDiskWrapper::VERSION_NUM = 7;

OnDiskWrapper::OnDiskWrapper()
{
Expand Down
70 changes: 65 additions & 5 deletions OnDiskPt/TargetPhrase.cpp
Expand Up @@ -162,10 +162,14 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// allocate mem
size_t numScores = onDiskWrapper.GetNumScores()
,numAlign = GetAlign().size();
size_t sparseFeatureSize = m_sparseFeatures.size();
size_t propSize = m_property.size();

size_t memNeeded = sizeof(UINT64); // file pos (phrase id)
memNeeded += sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign; // align
memNeeded += sizeof(float) * numScores; // scores
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ sizeof(float) * numScores // scores
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
+ sizeof(UINT64) + propSize; // property string

char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded);
Expand All @@ -183,11 +187,33 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// scores
memUsed += WriteScoresToMemory(mem + memUsed);

// sparse features
memUsed += WriteStringToMemory(mem + memUsed, m_sparseFeatures);

// property string
memUsed += WriteStringToMemory(mem + memUsed, m_property);

//DebugMem(mem, memNeeded);
assert(memNeeded == memUsed);
return mem;
}

size_t TargetPhrase::WriteStringToMemory(char *mem, const std::string &str) const
{
size_t memUsed = 0;
UINT64 *memTmp = (UINT64*) mem;

size_t strSize = str.size();
memTmp[0] = strSize;
memUsed += sizeof(UINT64);

const char *charStr = str.c_str();
memcpy(mem + memUsed, charStr, strSize);
memUsed += strSize;

return memUsed;
}

size_t TargetPhrase::WriteAlignToMemory(char *mem) const
{
size_t memUsed = 0;
Expand Down Expand Up @@ -231,7 +257,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
, const std::vector<float> &weightT
, bool isSyntax) const
{
Moses::TargetPhrase *ret = new Moses::TargetPhrase();
Moses::TargetPhrase *ret = new Moses::TargetPhrase(&phraseDict);

// words
size_t phraseSize = GetSize();
Expand Down Expand Up @@ -279,7 +305,14 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto

// scores
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());

// sparse features
ret->GetScoreBreakdown().Assign(&phraseDict, m_sparseFeatures);

// property
ret->SetProperties(m_property);

ret->EvaluateInIsolation(mosesSP, phraseDict.GetFeaturesToApply());

return ret;
}
Expand All @@ -299,9 +332,36 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
memUsed += ReadScoresFromFile(fileTPColl);
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());

// sparse features
memUsed += ReadStringFromFile(fileTPColl, m_sparseFeatures);

// properties
memUsed += ReadStringFromFile(fileTPColl, m_property);

return memUsed;
}

UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &outStr)
{
UINT64 bytesRead = 0;

UINT64 strSize;
fileTPColl.read((char*) &strSize, sizeof(UINT64));
bytesRead += sizeof(UINT64);

if (strSize) {
char *mem = (char*) malloc(strSize + 1);
mem[strSize] = '\0';
fileTPColl.read(mem, strSize);
outStr = string(mem);
free(mem);

bytesRead += strSize;
}

return bytesRead;
}

UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP)
{
UINT64 bytesRead = 0;
Expand Down

2 comments on commit e4eb201

@mtresearcher
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code stucks with cblm_max_age=1

One of the error is in line 39 of DynamicCacheBasedLanguageModel.cpp

for (unsigned int i=0; i < m_maxAge; i++)

should be

for (unsigned int i=0; i <= m_maxAge; i++)

There might be some other problem too... figuring it out.. will post soon..

@nicolabertoldi
Copy link
Member Author

@nicolabertoldi nicolabertoldi commented on e4eb201 Jun 5, 2015 via email

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.