fix Word-level Pooling to be in log scale in LinguisticEncoder

keonlee9420 · Feb 9, 2022 · 0dd9170 · 0dd9170
1 parent 7405cd4
commit 0dd9170
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/config/LJSpeech/train.yaml b/config/LJSpeech/train.yaml
@@ -4,9 +4,9 @@ dist_config:
   dist_url: "tcp://localhost:80000"
   world_size: 1
 path:
-  ckpt_path: "./output/ckpt/LJSpeech"
-  log_path: "./output/log/LJSpeech"
-  result_path: "./output/result/LJSpeech"
+  ckpt_path: "./output/ckpt/LJSpeech_fixing"
+  log_path: "./output/log/LJSpeech_fixing"
+  result_path: "./output/result/LJSpeech_fixing"
 optimizer:
   batch_size: 64
   betas: [0.9, 0.98]

diff --git a/model/linguistic_encoder.py b/model/linguistic_encoder.py
@@ -185,9 +185,9 @@ def forward(
         # Phoneme-level Duration Prediction
         log_duration_p_prediction = self.duration_predictor(enc_p_out, src_p_mask)
 
-        # Word-level Pooling
+        # Word-level Pooling (in log scale)
         log_duration_w_prediction = word_level_pooling(
-            log_duration_p_prediction.unsqueeze(-1), src_p_len, word_boundary, src_w_len, reduce="sum").squeeze(-1)
+            log_duration_p_prediction.exp().unsqueeze(-1), src_p_len, word_boundary, src_w_len, reduce="sum").log().squeeze(-1)
 
         x = enc_w_out
         if duration_target is not None: