Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
phikoehn committed Jul 29, 2015
1 parent 4992f62 commit ae9cd14
Showing 1 changed file with 77 additions and 77 deletions.
154 changes: 77 additions & 77 deletions scripts/Transliteration/post-decoding-transliteration.pl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

my $___FACTOR_DELIMITER = "|";

my ($MOSES_SRC_DIR,$TRANSLIT_MODEL,$EVAL_DIR,$OUTPUT_FILE,$OUTPUT_FILE_NAME,$OOV_FILE, $OOV_FILE_NAME, $EXTERNAL_BIN_DIR, $LM_FILE, $INPUT_EXTENSION, $OUTPUT_EXTENSION, $INPUT_FILE,$VERBOSE,$DECODER);
my ($MOSES_SRC_DIR,$TRANSLIT_MODEL,$EVAL_DIR,$OUTPUT_FILE,$OUTPUT_FILE_NAME,$OOV_FILE, $OOV_FILE_NAME, $EXTERNAL_BIN_DIR, $LM_FILE, $INPUT_EXTENSION, $OUTPUT_EXTENSION, $INPUT_FILE,$VERBOSE,$DECODER,$TMP_DIR);
die("ERROR: wrong syntax when invoking postDecodingTransliteration.perl")
unless &GetOptions('moses-src-dir=s' => \$MOSES_SRC_DIR,
'external-bin-dir=s' => \$EXTERNAL_BIN_DIR,
Expand All @@ -27,6 +27,7 @@
'output-extension=s' => \$OUTPUT_EXTENSION,
'decoder=s' => \$DECODER,
'oov-file=s' => \$OOV_FILE,
'tmp-dir=s' => \$TMP_DIR,
'input-file=s' => \$INPUT_FILE,
'output-file=s' => \$OUTPUT_FILE,
'verbose' => \$VERBOSE,
Expand Down Expand Up @@ -60,17 +61,19 @@
$OUTPUT_FILE_NAME = basename ($OUTPUT_FILE);
$OOV_FILE_NAME = basename ($OOV_FILE);

`mkdir $TRANSLIT_MODEL/evaluation`;
`cp $OOV_FILE $TRANSLIT_MODEL/evaluation/`;
my $translitFile = $TRANSLIT_MODEL . "/evaluation/" . $OOV_FILE_NAME;
$TMP_DIR = $OUTPUT_FILE.".tmp" unless defined($TMP_DIR);

`mkdir -p $TMP_DIR/transliteration`;
`cp $OOV_FILE $TMP_DIR/transliteration`;
my $translitFile = "$TMP_DIR/transliteration/$OOV_FILE_NAME";

print "Preparing for Transliteration\n";
prepare_for_transliteration ($OOV_FILE, $translitFile);
&prepare_for_transliteration ($OOV_FILE, $translitFile);
print "Run Transliteration\n";
run_transliteration ($MOSES_SRC_DIR , $EXTERNAL_BIN_DIR , $TRANSLIT_MODEL , $OOV_FILE_NAME);
&run_transliteration ($MOSES_SRC_DIR , $EXTERNAL_BIN_DIR , $TRANSLIT_MODEL , $OOV_FILE_NAME);
print "Pick Best Transliteration\n";
form_corpus ($translitFile , $translitFile.".op.nBest" , $EVAL_DIR);
run_decoder($MOSES_SRC_DIR, $EXTERNAL_BIN_DIR, $LM_FILE);
&form_corpus ($translitFile , $translitFile.".op.nBest" , $EVAL_DIR);
&run_decoder($MOSES_SRC_DIR, $EXTERNAL_BIN_DIR, $LM_FILE);

################### Read the UNK word file and prepare for Transliteration ###############################

Expand Down Expand Up @@ -132,16 +135,18 @@ sub run_transliteration
my $EXTERNAL_BIN_DIR = $list[1];
my $TRANSLIT_MODEL = $list[2];
my $eval_file = $list[3];
print "run_transliteration($MOSES_SRC,$EXTERNAL_BIN_DIR,$TRANSLIT_MODEL,$eval_file)\n";

`touch $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`;
`touch $TMP_DIR/transliteration/$eval_file.moses.table.ini`;

print "Filter Table\n";
print "Filter Table... ".`date`;

<<<<<<< Updated upstream
`$MOSES_SRC/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' \\
-first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR \\
-f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION \\
-phrase-translation-table $TRANSLIT_MODEL/model/phrase-table \\
-config $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini \\
-lm 0:3:$TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini:8`;
Expand All @@ -151,11 +156,21 @@ sub run_transliteration
$TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini \\
$TRANSLIT_MODEL/evaluation/$eval_file \\
-Binarizer "$MOSES_SRC/bin/CreateOnDiskPt 1 1 4 100 2"`;
=======
my $cmd = "$MOSES_SRC/scripts/training/train-model.perl -dont-zip -first-step 9 -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION -e $OUTPUT_EXTENSION -phrase-translation-table $TRANSLIT_MODEL/model/phrase-table -config $TMP_DIR/transliteration/$eval_file.moses.table.ini -lm 0:3:$TMP_DIR/transliteration/$eval_file.moses.table.ini:8";
print $cmd."\n";
`$cmd`;
`rm $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`;
$cmd = "$MOSES_SRC/scripts/training/filter-model-given-input.pl $TMP_DIR/transliteration/$eval_file.filtered $TMP_DIR/transliteration/$eval_file.moses.table.ini $TMP_DIR/transliteration/$eval_file -Binarizer \"$MOSES_SRC/bin/CreateOnDiskPt 1 1 4 100 2\"";
print $cmd."\n";
`$cmd`;
>>>>>>> Stashed changes
`rm $TMP_DIR/transliteration/$eval_file.moses.table.ini`;
print "Apply Filter\n";
<<<<<<< Updated upstream
`$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl \\
$TRANSLIT_MODEL/evaluation/$eval_file.filtered/moses.ini \\
$TRANSLIT_MODEL/model/moses.ini \\
Expand All @@ -170,7 +185,16 @@ sub run_transliteration
distinct -f $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini \\
< $TRANSLIT_MODEL/evaluation/$eval_file \\
> $TRANSLIT_MODEL/evaluation/$eval_file.op $drop_stderr`;

=======
$cmd = "$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl $TMP_DIR/transliteration/$eval_file.filtered/moses.ini $TRANSLIT_MODEL/model/moses.ini $TRANSLIT_MODEL/tuning/moses.tuned.ini $TMP_DIR/transliteration/$eval_file.filtered.ini";
print $cmd."\n";
`$cmd`;
my $drop_stderr = $VERBOSE ? "" : " 2>/dev/null";
$cmd = "$DECODER -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 -threads 16 -drop-unknown -distortion-limit 0 -n-best-list $TMP_DIR/transliteration/$eval_file.op.nBest 1000 distinct -f $TMP_DIR/transliteration/$eval_file.filtered.ini < $TMP_DIR/transliteration/$eval_file > $TMP_DIR/transliteration/$eval_file.op $drop_stderr";
print $cmd."\n";
`$cmd`;
>>>>>>> Stashed changes
}
################### Read the output of Transliteration Model and Form Corpus ###############################
Expand All @@ -191,10 +215,10 @@ sub form_corpus
my @UNK;
my %vocab;
`mkdir -p $EVAL_DIR/Transliteration-Module/$OUTPUT_FILE_NAME/model`;
`mkdir -p $TMP_DIR/retranslation/model`;
my $antLog = exp(0.2);
my $phraseTable = $EVAL_DIR . "/Transliteration-Module/$OUTPUT_FILE_NAME/model/phrase-table";
my $phraseTable = "$TMP_DIR/retranslation/model/phrase-table";
open MYFILE, "<:encoding(UTF-8)", $inp_file or die "Can't open $inp_file: $!\n";
open PT, ">:encoding(UTF-8)", $phraseTable or die "Can't open $phraseTable: $!\n";
Expand Down Expand Up @@ -301,66 +325,42 @@ sub run_decoder
my @list = @_;
my $MOSES_SRC = $list[0];
my $EXTERNAL_BIN_DIR = $list[1];
my $corpus_dir = $EVAL_DIR . "/Transliteration-Module/$OUTPUT_FILE_NAME";
my $LM_FILE = $list[2];
my @words;
my $final_file = $EVAL_DIR . "/$OUTPUT_FILE_NAME";

my $find = ".cleaned.";
my $replace = ".transliterated.";
if ($final_file !~ /$find/) {
$find = ".output.";
}
$final_file =~ s/$find/$replace/g;

`mkdir $corpus_dir/evaluation`;

`$MOSES_SRC/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-lmodel-oov-feature "yes" -post-decoding-translit "yes" \\
-phrase-translation-table $corpus_dir/model/phrase-table \\
-config $corpus_dir/model/moses.ini -lm 0:5:$LM_FILE:8`;

`touch $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini`;

`$MOSES_SRC/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-lmodel-oov-feature "yes" -post-decoding-translit "yes" \\
-phrase-translation-table $corpus_dir/model/phrase-table \\
-config $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini \\
-lm 0:3:$corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini:8`;
`$MOSES_SRC/scripts/training/filter-model-given-input.pl \\
$corpus_dir/evaluation/filtered \\
$corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini \\
$INPUT_FILE -Binarizer "$MOSES_SRC/bin/CreateOnDiskPt \\
1 1 4 100 2"`;

`rm $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini`;

`$MOSES_SRC/scripts/ems/support/substitute-filtered-tables.perl \\
$corpus_dir/evaluation/filtered/moses.ini \\
< $corpus_dir/model/moses.ini \\
> $corpus_dir/evaluation/moses.filtered.ini`;

my $drop_stderr = $VERBOSE ? "" : " 2>/dev/null";
`$DECODER \\
-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \\
-threads 16 -feature-overwrite 'TranslationModel0 table-limit=100' \\
-max-trans-opt-per-coverage 100 \\
-f $corpus_dir/evaluation/moses.filtered.ini -distortion-limit 0 \\
< $INPUT_FILE \\
> $OUTPUT_FILE $drop_stderr`;

print "$DECODER \\
-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \\
-threads 16 -feature-overwrite 'TranslationModel0 table-limit=100' \\
-max-trans-opt-per-coverage 100 \\
-f $corpus_dir/evaluation/moses.filtered.ini -distortion-limit 0 \\
< $INPUT_FILE \\
> $OUTPUT_FILE $drop_stderr\n";
`mkdir -p $TMP_DIR/retranslation/evaluation`;
print "Creating config file... ".`date`;
my $cmd = "$MOSES_SRC/scripts/training/train-model.perl "
."-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 "
."-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION "
."-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 "
."-lmodel-oov-feature \"yes\" -post-decoding-translit \"yes\" "
."-phrase-translation-table $TMP_DIR/retranslation/model/phrase-table "
."-config $TMP_DIR/retranslation/model/moses.ini -lm 0:5:$LM_FILE:8";
print $cmd."\n";
`$cmd`;
print "Filtering transliteration phrase table... ".`date`;
$cmd = "$MOSES_SRC/scripts/training/filter-model-given-input.pl "
."$TMP_DIR/retranslation/filtered "
."$TMP_DIR/retranslation/model/moses.ini "
."$INPUT_FILE -Binarizer \"$MOSES_SRC/bin/CreateOnDiskPt "
."1 1 4 100 2\"";
print $cmd."\n";
`$cmd`;
print "Retranslating... ".`date`;
my $drop_stderr = $VERBOSE ? "" : " 2>/dev/null";
$cmd = "$DECODER "
."-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 "
."-threads 16 -feature-overwrite 'TranslationModel0 table-limit=100' "
."-max-trans-opt-per-coverage 100 "
."-f $TMP_DIR/retranslation/filtered/moses.ini -distortion-limit 0 "
."< $INPUT_FILE "
."> $OUTPUT_FILE $drop_stderr";
print $cmd."\n";
`$cmd`;
print "Done. ".`date`;
}

0 comments on commit ae9cd14

Please sign in to comment.