Permalink
Browse files

updated README and examples

  • Loading branch information...
mjpost committed Jun 30, 2012
1 parent 9b8b6e4 commit 983af2db1af8dc9608424b98a5e39aa7f936d9e9
View
16 README
@@ -18,32 +18,31 @@ The basic decoder invocation is:
cat SOURCE | JOSHUA -c CONFIG > OUTPUT
-An example of such a model can
-be found in the example/ directory. To run this example, first setup some
-environment variables:
+Example invocations can be found within the example/ directory. To run them,
+first setup some environment variables:
export JOSHUA=/path/to/joshua
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
Then type:
- cat example/example.test.in | $JOSHUA/joshua-decoder -c example/example.config.kenlm
+ cat examples/example/test.in | $JOSHUA/joshua-decoder -c examples/example/joshua.config
The decoder output will load the language model and translation models defined
in the configuration file, and will then decode the five sentences in the
example file.
You can enable multithreaded decoding with the -threads N flag:
- cat example/example.test.in | $JOSHUA/joshua-decoder -c example/example.config.kenlm -threads 5
+ cat examples/example/test.in | $JOSHUA/joshua-decoder -c examples/example/joshua.config -threads 5
The configuration file defines many additional parameters, all of which can be
overridden on the command line by using the format -PARAMETER value. For
example, to output the top 10 hypotheses instead of just the top 1 specified in
-the configuration file, use -top_n N:
+the configuration file, use -top-n N:
- cat example/example.test.in | $JOSHUA/joshua-decoder -c example/example.config.kenlm -top_n 10
+ cat examples/example/test.in | $JOSHUA/joshua-decoder -c examples/example/joshua.config -top_n 10
Parameters, whether in the configuration file or on the command line, are
converted to a canonical internal representation that ignores hyphens,
@@ -55,4 +54,5 @@ equivalent:
and so on. For an example of parameters, see the Joshua configuration file
template in $JOSHUA/scripts/training/templates/mert/joshua.config or the online
-documentation at joshua-decoder.org/4.0/decoder.html .
+documentation at joshua-decoder.org/4.0/decoder.html. There is a wealth of
+information in the online documentation.
View
@@ -1,5 +1,5 @@
The examples in this directory are in various states of functionality.
If you want to see working code and examples of usage, please consult the
-online documentation (joshua-decoder.org/userdocs). You should also find
+online documentation at joshua-decoder.org. You should also find
some useful examples in the pipeline script (the recommended way to use Joshua).
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-java -classpath $CLASSPATH:./bin \
- -Xmx1000m -Xms1000m \
- joshua.decoder.JoshuaDecoder example/example.config.bloomfilterlm example/example.test.in example/example.nbest.bloomfilterlm.out
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-java -classpath $CLASSPATH:./bin \
- -Xmx1000m -Xms1000m \
- joshua.decoder.JoshuaDecoder example/example.config.javalm example/example.test.in example/example.nbest.javalm.out
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-java -classpath ${CLASSPATH}:${JOSHUA}/bin \
- -Djava.library.path=${JOSHUA}/lib \
- -Dfile.encoding=utf8 \
- -Djava.util.logging.config.file=${JOSHUA}/logging.properties \
- -Xmx1000m -Xms1000m \
- joshua.decoder.JoshuaDecoder example/example.config.kenlm example/example.test.in example/example.nbest.srilm.out
Binary file not shown.
@@ -1,74 +0,0 @@
-lm_file=example/example.bloomfilter.lm.gz
-
-tm_file=example/example.hiero.tm.gz
-tm_format=hiero
-
-glue_file=grammars/hiero.glue
-glue_format=hiero
-
-#lm config
-use_srilm=false
-use_bloomfilter_lm=true
-lm_ceiling_cost=100
-use_left_euqivalent_state=false
-use_right_euqivalent_state=false
-order=3
-
-
-#tm config
-span_limit=10
-phrase_owner=pt
-mono_owner=mono
-begin_mono_owner=begin_mono
-default_non_terminal=X
-goalSymbol=S
-
-#pruning config
-fuzz1=0.1
-fuzz2=0.1
-max_n_items=30
-relative_threshold=10.0
-max_n_rules=50
-rule_relative_threshold=10.0
-
-#nbest config
-use_unique_nbest=true
-use_tree_nbest=false
-add_combined_cost=true
-top_n=300
-
-
-#remoter lm server config,we should first prepare remote_symbol_tbl before starting any jobs
-use_remote_lm_server=false
-remote_symbol_tbl=./voc.remote.sym
-num_remote_lm_servers=4
-f_remote_server_list=./remote.lm.server.list
-remote_lm_server_port=9000
-
-
-#parallel deocoder: it cannot be used together with remote lm
-num_parallel_decoders=1
-parallel_files_prefix=.
-
-#disk hg
-save_disk_hg=false
-
-###### model weights
-#lm order weight
-lm 1.000000
-
-#phrasemodel owner column(0-indexed) weight
-phrasemodel pt 0 1.066893
-phrasemodel pt 1 0.752247
-phrasemodel pt 2 0.589793
-
-#arityphrasepenalty owner start_arity end_arity weight
-#arityphrasepenalty pt 0 0 1.0
-#arityphrasepenalty pt 1 2 -1.0
-
-#phrasemodel mono 0 0.5
-
-#wordpenalty weight
-wordpenalty -2.844814
-#latticecost 1.0
-
@@ -1,73 +0,0 @@
-lm_file=example/example.trigram.lm.gz
-
-tm_file=example/example.hiero.tm.gz
-tm_format=hiero
-
-glue_file=grammars/hiero.glue
-glue_format=hiero
-
-#lm config
-use_srilm=false
-lm_ceiling_cost=100
-use_left_equivalent_state=false
-use_right_equivalent_state=false
-order=3
-
-
-#tm config
-span_limit=10
-phrase_owner=pt
-mono_owner=mono
-begin_mono_owner=begin_mono
-default_non_terminal=X
-goalSymbol=S
-
-#pruning config
-fuzz1=0.1
-fuzz2=0.1
-max_n_items=30
-relative_threshold=10.0
-max_n_rules=50
-rule_relative_threshold=10.0
-
-#nbest config
-use_unique_nbest=true
-use_tree_nbest=false
-add_combined_cost=true
-top_n=300
-
-
-#remoter lm server config,we should first prepare remote_symbol_tbl before starting any jobs
-use_remote_lm_server=false
-remote_symbol_tbl=./voc.remote.sym
-num_remote_lm_servers=4
-f_remote_server_list=./remote.lm.server.list
-remote_lm_server_port=9000
-
-
-#parallel deocoder: it cannot be used together with remote lm
-num_parallel_decoders=1
-parallel_files_prefix=.
-
-#disk hg
-save_disk_hg=false
-
-###### model weights
-#lm order weight
-lm 1.000000
-
-#phrasemodel owner column(0-indexed) weight
-phrasemodel pt 0 1.066893
-phrasemodel pt 1 0.752247
-phrasemodel pt 2 0.589793
-
-#arityphrasepenalty owner start_arity end_arity weight
-#arityphrasepenalty pt 0 0 1.0
-#arityphrasepenalty pt 1 2 -1.0
-
-#phrasemodel mono 0 0.5
-
-#wordpenalty weight
-wordpenalty -2.844814
-#latticecost 1.0
-
@@ -1,70 +0,0 @@
-lm_file=example/example.trigram.lm.gz
-
-tm_file=example/example.hiero.tm.gz
-tm_format=hiero
-
-glue_file=../grammars/hiero.glue
-glue_format=hiero
-
-#lm config
-use_srilm=true
-lm_ceiling_cost=100
-use_left_equivalent_state=false
-use_right_equivalent_state=false
-order=3
-
-
-#tm config
-span_limit=10
-phrase_owner=pt
-mono_owner=mono
-begin_mono_owner=begin_mono
-default_non_terminal=X
-goalSymbol=S
-
-#pruning config
-fuzz1=0.1
-fuzz2=0.1
-max_n_items=30
-relative_threshold=10.0
-max_n_rules=50
-rule_relative_threshold=10.0
-
-#nbest config
-use_unique_nbest=true
-use_tree_nbest=false
-add_combined_cost=true
-top_n=300
-
-
-#remoter lm server config,we should first prepare remote_symbol_tbl before starting any jobs
-use_remote_lm_server=false
-remote_symbol_tbl=./voc.remote.sym
-num_remote_lm_servers=4
-f_remote_server_list=./remote.lm.server.list
-remote_lm_server_port=9000
-
-
-#parallel deocoder: it cannot be used together with remote lm
-num_parallel_decoders=1
-parallel_files_prefix=/tmp/
-
-
-###### model weights
-#lm order weight
-lm 1.000000
-
-#phrasemodel owner column(0-indexed) weight
-phrasemodel pt 0 1.066893
-phrasemodel pt 1 0.752247
-phrasemodel pt 2 0.589793
-
-#arityphrasepenalty owner start_arity end_arity weight
-#arityphrasepenalty pt 0 0 1.0
-#arityphrasepenalty pt 1 2 -1.0
-
-#phrasemodel mono 0 0.5
-
-#wordpenalty weight
-wordpenalty -2.844814
-
Binary file not shown.
Binary file not shown.
File renamed without changes.
@@ -0,0 +1,45 @@
+tm_file=examples/example/hiero.tm.gz
+tm_format=thrax
+
+glue_file=examples/grammars/hiero.glue
+glue_format=thrax
+
+#lm config
+lm = kenlm 3 0 0 100 examples/example/trigram.lm.gz
+
+#tm config
+span_limit=10
+phrase_owner=pt
+default_non_terminal=X
+goalSymbol=S
+
+#pruning config
+pop-limit = 10
+
+#nbest config
+use_unique_nbest=true
+use_tree_nbest=false
+add_combined_cost=true
+top_n=1
+
+threads = 1
+
+
+###### model weights
+#lm order weight
+lm 0 1.000000
+
+#phrasemodel owner column(0-indexed) weight
+phrasemodel pt 0 1.066893
+phrasemodel pt 1 0.752247
+phrasemodel pt 2 0.589793
+
+#arityphrasepenalty owner start_arity end_arity weight
+#arityphrasepenalty pt 0 0 1.0
+#arityphrasepenalty pt 1 2 -1.0
+
+#phrasemodel mono 0 0.5
+
+#wordpenalty weight
+wordpenalty -2.844814
+
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
View
@@ -1 +1,3 @@
-Someone needs to write this README file.
+Someone needs to write this README file.
+
+That someone is Jonny Weese (jonny@cs.jhu.edu).

0 comments on commit 983af2d

Please sign in to comment.