Updated example config files.

mckinziebrandon · May 17, 2017 · 171182f · 171182f
1 parent 77c7f8a
commit 171182f
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 37 deletions.
diff --git a/configs/cornell_basic.yml b/configs/cornell_basic.yml
diff --git a/configs/example_cornell.yml b/configs/example_cornell.yml
@@ -1,16 +1,13 @@
 model: DynamicBot
 dataset: Cornell
 model_params:
-    attention_size: 256  # (optional even with AttentionDecoder)
-    batch_size: 128
-    base_cell: GRUCell
-    encoder.class: BidirectionalEncoder
+    base_cell: LSTMCell
+    num_layers: 2
+    attention_mechanism: LuongAttention
     decoder.class: AttentionDecoder
-    num_layers: 1
-    ckpt_dir: out/cornell/bi_attn
-    steps_per_ckpt: 250
+    encoder.class: BidirectionalEncoder
+    ckpt_dir: out/cornell
 dataset_params:
     data_dir: /home/brandon/Datasets/cornell # The only truly 'mandatory' parameter.
     vocab_size: 52000 # Approximately the true number of unique words in the dataset.
     max_seq_len: 20
-    optimize_params: True
diff --git a/configs/example_reddit.yml b/configs/example_reddit.yml
@@ -1,16 +1,15 @@
-dataset: data.Reddit
-dataset_params:
-  config_path: configs/example_reddit.yml
-  data_dir: /home/brandon/Datasets/reddit
-  max_seq_len: 25
-  vocab_size: 80000
 model: DynamicBot
+dataset: Reddit
 model_params:
   base_cell: GRUCell
-  batch_size: 64
-  ckpt_dir: out/reddit/talkativeAttention
-  decoder.class: AttentionDecoder
+  batch_size: 128
   embed_size: 128
   num_layers: 1
   reset_model: true
   steps_per_ckpt: 200
+  ckpt_dir: out/reddit/basicReddit
+dataset_params:
+  data_dir: /home/brandon/Datasets/reddit
+  max_seq_len: 15
+  vocab_size: 80000  # HUGE dataset = huge vocabulary.
+
diff --git a/configs/example_ubuntu.yml b/configs/example_ubuntu.yml
@@ -0,0 +1,13 @@
+model: DynamicBot
+dataset: Ubuntu
+model_params:
+  base_cell: GRUCell
+  ckpt_dir: out/ubuntu
+  decoder.class: BasicDecoder
+  encoder.class: BasicEncoder
+  num_layers: 2
+  state_size: 512
+dataset_params:
+  data_dir: /home/brandon/Datasets/ubuntu
+  vocab_size: 60000  # Should probably be higher. Ubuntu is noisy.
+  max_seq_len: 12  # Any longer, and output quality is a challenge.