Skip to content

Commit

Permalink
allow configuration of operation sequence model loading, allow specif…
Browse files Browse the repository at this point in the history
…ication of KENLM/OSM loading in experiment.perl / train-model.perl
  • Loading branch information
phikoehn committed May 29, 2016
1 parent 227e541 commit 942eb5a
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 9 deletions.
3 changes: 2 additions & 1 deletion moses/FF/OSM-Feature/KenOSM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
namespace Moses
{

OSMLM* ConstructOSMLM(const char *file)
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
{
lm::ngram::ModelType model_type;
lm::ngram::Config config;
config.load_method = load_method;
if (lm::ngram::RecognizeBinary(file, model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
Expand Down
2 changes: 1 addition & 1 deletion moses/FF/OSM-Feature/KenOSM.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class KenOSM : public KenOSMBase

typedef KenOSMBase OSMLM;

OSMLM* ConstructOSMLM(const char *file);
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);


} // namespace
17 changes: 16 additions & 1 deletion moses/FF/OSM-Feature/OpSequenceModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ OpSequenceModel::OpSequenceModel(const std::string &line)
tFactor = 0;
numFeatures = 5;
ReadParameters();
load_method = util::READ;
}

OpSequenceModel::~OpSequenceModel()
Expand All @@ -27,7 +28,7 @@ OpSequenceModel::~OpSequenceModel()
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
OSM = ConstructOSMLM(m_lmPath.c_str());
OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);

State startState = OSM->NullContextState();
State endState;
Expand Down Expand Up @@ -248,6 +249,20 @@ void OpSequenceModel::SetParameter(const std::string& key, const std::string& va
sFactor = Scan<int>(value);
} else if (key == "output-factor") {
tFactor = Scan<int>(value);
} else if (key == "load") {
if (value == "lazy") {
load_method = util::LAZY;
} else if (value == "populate_or_lazy") {
load_method = util::POPULATE_OR_LAZY;
} else if (value == "populate_or_read" || value == "populate") {
load_method = util::POPULATE_OR_READ;
} else if (value == "read") {
load_method = util::READ;
} else if (value == "parallel_read") {
load_method = util::PARALLEL_READ;
} else {
UTIL_THROW2("Unknown KenLM load method " << value);
}
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
Expand Down
1 change: 1 addition & 0 deletions moses/FF/OSM-Feature/OpSequenceModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class OpSequenceModel : public StatefulFeatureFunction
int sFactor; // Source Factor ...
int tFactor; // Target Factor ...
int numFeatures; // Number of features used ...
util::LoadMethod load_method; // method to load model

OpSequenceModel(const std::string &line);
~OpSequenceModel();
Expand Down
2 changes: 1 addition & 1 deletion scripts/ems/experiment.meta
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ create-config
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table transliteration-table generation-table-pruned sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero thot
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini mmsapt no-glue-grammar dont-tune-glue-grammar use-syntax-input-weight-feature operation-sequence-model-load-method
default-name: model/moses.ini
error: Unknown option
error: requires an argument
Expand Down
6 changes: 5 additions & 1 deletion scripts/ems/experiment.perl
Original file line number Diff line number Diff line change
Expand Up @@ -2660,12 +2660,16 @@ sub define_training_create_config {

if ($osm) {
my $osm_settings = &get("TRAINING:operation-sequence-model-settings");
if ($osm_settings =~ /-factor *(\S+)/){
if ($osm_settings =~ /-factor *(\S+)/) {
$cmd .= "-osm-model $osm/ -osm-setting $1 ";
}
else {
$cmd .= "-osm-model $osm/operationLM.bin ";
}
my $osm_load_method = &get("TRAINING:operation-sequence-model-load-method");
if (defined($osm_load_method)) {
$cmd .= "-osm-load-method $osm_load_method ";
}
}

if (&get("TRAINING:phrase-orientation")) {
Expand Down
14 changes: 10 additions & 4 deletions scripts/training/train-model.perl
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
$_CONFIG,
$_OSM,
$_OSM_FACTORS,
$_OSM_LOAD_METHOD,
$_POST_DECODING_TRANSLIT,
$_TRANSLITERATION_PHRASE_TABLE,
$_HIERARCHICAL,
Expand Down Expand Up @@ -238,6 +239,7 @@
'config=s' => \$_CONFIG,
'osm-model=s' => \$_OSM,
'osm-setting=s' => \$_OSM_FACTORS,
'osm-load-method=s' => \$_OSM_LOAD_METHOD,
'post-decoding-translit=s' => \$_POST_DECODING_TRANSLIT,
'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
'mmsapt' => \$_MMSAPT,
Expand Down Expand Up @@ -2249,6 +2251,8 @@ sub create_ini {

if($_OSM)
{
my $load_method = "";
$load_method = " load=$_OSM_LOAD_METHOD" if defined($_OSM_LOAD_METHOD);
if (defined($_OSM_FACTORS))
{
my $count = 0;
Expand All @@ -2258,11 +2262,11 @@ sub create_ini {
my ($factor_f,$factor_e) = split(/\-/,$factor_val);

if($count == 0){
$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=5 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=yes \n";
$weight_spec .= "OpSequenceModel$count= 0.08 -0.02 0.02 -0.001 0.03\n";
}
else{
$feature_spec .= "OpSequenceModel name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel$count num-features=1 path=". $_OSM . $factor_val . "/operationLM.bin" . " input-factor=". $factor_f . " output-factor=". $factor_e . " support-features=no \n";
$weight_spec .= "OpSequenceModel$count= 0.08 \n";

}
Expand All @@ -2271,7 +2275,7 @@ sub create_ini {
}
else
{
$feature_spec .= "OpSequenceModel name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
$feature_spec .= "OpSequenceModel$load_method name=OpSequenceModel0 num-features=5 path=". $_OSM . " \n";
$weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
}
}
Expand All @@ -2292,7 +2296,9 @@ sub create_ini {
}
$type = "KENLM" unless defined $type; # default to KENLM if no type given

if ($type =~ /^\d+$/) {
if ($type =~ /^8-(.+)/) {
$type = "KENLM load=$1";
} elsif ($type =~ /^\d+$/) {
# backwards compatibility if the type is given not as string but as a number
if ($type == 0) {
$type = "SRILM";
Expand Down

0 comments on commit 942eb5a

Please sign in to comment.