nnsvs · r9y9 · Nov 26, 2020 · Nov 15, 2020 · Nov 15, 2020 · Nov 15, 2020
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/.gitkeep b/egs/nit-song070/svs-world-conv-mdn/conf/train/.gitkeep
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/data/myconfig.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/data/myconfig.yaml
@@ -0,0 +1,16 @@
+# @package _group_
+
+# training set
+train_no_dev:
+  in_dir:
+  out_dir:
+
+# development set
+dev:
+  in_dir:
+  out_dir:
+
+# data loader
+num_workers: 2
+batch_size: 2
+pin_memory: true
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/model/acoustic_baseline.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/model/acoustic_baseline.yaml
@@ -0,0 +1,17 @@
+# @package _group_
+# (mgc, lf0, vuv, bap)
+stream_sizes: [180, 3, 1, 15]
+has_dynamic_features: [true, true, false, true]
+num_windows: 3
+# If None, automatically set based on stream sizes
+stream_weights:
+
+netG:
+  _target_: nnsvs.model.Conv1dResnetMDN
+  in_dim: 299
+  out_dim: 199
+  hidden_dim: 128
+  num_layers: 6
+  dropout: 0.1
+  num_gaussians: 4
+  dim_wise: false
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/model/acoustic_mdn.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/model/acoustic_mdn.yaml
@@ -0,0 +1,17 @@
+# @package _group_
+# (mgc, lf0, vuv, bap)
+stream_sizes: [180, 3, 1, 15]
+has_dynamic_features: [true, true, false, true]
+num_windows: 3
+# If None, automatically set based on stream sizes
+stream_weights:
+
+netG:
+  _target_: nnsvs.model.Conv1dResnetMDN
+  in_dim: 299
+  out_dim: 199
+  hidden_dim: 128
+  num_layers: 6
+  dropout: 0.1
+  num_gaussians: 4
+  dim_wise: true
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/train/myconfig.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/acoustic/train/myconfig.yaml
@@ -0,0 +1,29 @@
+# @package _group_
+
+out_dir: exp
+nepochs: 200
+checkpoint_epoch_interval: 20
+
+stream_wise_loss: false
+use_detect_anomaly: true
+
+optim:
+  optimizer:
+    name: Adam
+    params:
+      lr: 0.001
+      betas: [0.9, 0.999]
+      weight_decay: 0.0
+  lr_scheduler:
+    name: StepLR
+    params:
+      step_size: 50
+      gamma: 0.5
+
+resume:
+  checkpoint:
+  load_optimizer: false
+
+cudnn:
+  benchmark: false
+  deterministic: true
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/duration/data/myconfig.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/duration/data/myconfig.yaml
@@ -0,0 +1,16 @@
+# @package _group_
+
+# training set
+train_no_dev:
+  in_dir:
+  out_dir:
+
+# development set
+dev:
+  in_dir:
+  out_dir:
+
+# data loader
+num_workers: 2
+batch_size: 2
+pin_memory: true
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/duration/model/duration_mdn.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/duration/model/duration_mdn.yaml
@@ -0,0 +1,14 @@
+# @package _group_
+
+stream_sizes: [1]
+has_dynamic_features: [false]
+stream_weights: [1]
+
+netG:
+  _target_: nnsvs.model.MDN
+  in_dim: 295
+  out_dim: 1
+  hidden_dim: 1024
+  num_layers: 4
+  dropout: 0.5
+  num_gaussians: 4
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/duration/train/myconfig.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/duration/train/myconfig.yaml
@@ -0,0 +1,29 @@
+# @package _group_
+
+out_dir: exp
+nepochs: 50
+checkpoint_epoch_interval: 20
+
+stream_wise_loss: false
+use_detect_anomaly: true
+
+optim:
+  optimizer:
+    name: Adam
+    params:
+      lr: 0.001
+      betas: [0.9, 0.999]
+      weight_decay: 0.0
+  lr_scheduler:
+    name: StepLR
+    params:
+      step_size: 20
+      gamma: 0.5
+
+resume:
+  checkpoint:
+  load_optimizer: false
+
+cudnn:
+  benchmark: false
+  deterministic: true
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/timelag/data/myconfig.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/timelag/data/myconfig.yaml
@@ -0,0 +1,16 @@
+# @package _group_
+
+# training set
+train_no_dev:
+  in_dir:
+  out_dir:
+
+# development set
+dev:
+  in_dir:
+  out_dir:
+
+# data loader
+num_workers: 2
+batch_size: 2
+pin_memory: true
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/timelag/model/timelag_mdn.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/timelag/model/timelag_mdn.yaml
@@ -0,0 +1,14 @@
+# @package _group_
+
+stream_sizes: [1]
+has_dynamic_features: [false]
+stream_weights: [1]
+
+netG:
+  _target_: nnsvs.model.MDN
+  in_dim: 295
+  out_dim: 1
+  hidden_dim: 1024
+  num_layers: 4
+  dropout: 0.5
+  num_gaussians: 4
diff --git a/egs/nit-song070/svs-world-conv-mdn/conf/train/timelag/train/myconfig.yaml b/egs/nit-song070/svs-world-conv-mdn/conf/train/timelag/train/myconfig.yaml
@@ -0,0 +1,29 @@
+# @package _group_
+
+out_dir: exp
+nepochs: 50
+checkpoint_epoch_interval: 20
+
+stream_wise_loss: false
+use_detect_anomaly: true
+
+optim:
+  optimizer:
+    name: Adam
+    params:
+      lr: 0.001
+      betas: [0.9, 0.999]
+      weight_decay: 0.0
+  lr_scheduler:
+    name: StepLR
+    params:
+      step_size: 20
+      gamma: 0.5
+
+resume:
+  checkpoint:
+  load_optimizer: false
+
+cudnn:
+  benchmark: false
+  deterministic: true
diff --git a/egs/nit-song070/svs-world-conv-mdn/config.yaml b/egs/nit-song070/svs-world-conv-mdn/config.yaml
@@ -0,0 +1,65 @@
+# General settings.
+spk: "yoko"
+
+# exp tag(for managing experiments)
+tag:
+
+###########################################################
+#                DATA PREPARATION SETTING                 #
+###########################################################
+
+# Directory of Unzipped singing voice database
+# PLEASE CHANGE THE PATH BASED ON YOUR ENVIRONMENT
+db_root: "downloads/HTS-demo_NIT-SONG070-F001"
+
+# Output directory
+out_dir: "./data"
+
+###########################################################
+#                FEATURE EXTRACTION SETTING               #
+###########################################################
+
+# HTS-style question used for extracting musical/linguistic context from musicxml files
+question_path: "../../_common/hed/jp_qst003_nnsvs.hed"
+
+timelag_features: defaults
+duration_features: defaults
+acoustic_features: static_deltadelta
+
+###########################################################
+#                TRAINING SETTING                         #
+###########################################################
+
+# Models
+# To customize, put your config or change ones in
+# conf/train/{timelag,duration,acoustic}/ and
+# specify the config name below
+# NOTE: *_model: model definition, *_train: general train configs,
+# *_data: data configs (e.g., batch size)
+
+timelag_model: timelag_mdn
+timelag_train: myconfig
+timelag_data: myconfig
+
+duration_model: duration_mdn
+duration_train: myconfig
+duration_data: myconfig
+
+acoustic_model: acoustic_mdn
+acoustic_train: myconfig
+acoustic_data: myconfig
+
+# Pretrained model dir (leave empty to disable)
+pretrained_expdir:
+
+###########################################################
+#                SYNTHESIS SETTING                        #
+###########################################################
+timelag_synthesis: defaults
+duration_synthesis: defaults
+acoustic_synthesis: defaults
+
+# latest.pth or best.pth
+timelag_eval_checkpoint: latest.pth
+duration_eval_checkpoint: latest.pth
+acoustic_eval_checkpoint: latest.pth
diff --git a/egs/nit-song070/svs-world-conv-mdn/local b/egs/nit-song070/svs-world-conv-mdn/local
@@ -0,0 +1 @@
+../svs-world-conv
diff --git a/egs/nit-song070/svs-world-conv-mdn/run.sh b/egs/nit-song070/svs-world-conv-mdn/run.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Set bash to 'debug' mode, it will exit on :
+# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
+set -e
+set -u
+set -o pipefail
+
+function xrun () {
+    set -x
+    $@
+    set +x
+}
+
+script_dir=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
+NNSVS_ROOT=$script_dir/../../../
+NNSVS_COMMON_ROOT=$NNSVS_ROOT/egs/_common/spsvs
+. $NNSVS_ROOT/utils/yaml_parser.sh || exit 1;
+
+eval $(parse_yaml "./config.yaml" "")
+
+train_set="train_no_dev"
+dev_set="dev"
+eval_set="eval"
+datasets=($train_set $dev_set $eval_set)
+testsets=($dev_set $eval_set)
+
+dumpdir=dump
+
+dump_org_dir=$dumpdir/$spk/org
+dump_norm_dir=$dumpdir/$spk/norm
+
+stage=0
+stop_stage=0
+
+. $NNSVS_ROOT/utils/parse_options.sh || exit 1;
+
+# exp name
+if [ -z ${tag:=} ]; then
+    expname=${spk}
+else
+    expname=${spk}_${tag}
+fi
+expdir=exp/$expname
+
+if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
+    if [ ! -e downloads/HTS-demo_NIT-SONG070-F001 ]; then
+        echo "stage -1: Downloading data"
+        mkdir -p downloads
+        cd downloads
+        curl -LO http://hts.sp.nitech.ac.jp/archives/2.3/HTS-demo_NIT-SONG070-F001.tar.bz2
+        tar jxvf HTS-demo_NIT-SONG070-F001.tar.bz2
+	cd $script_dir
+    fi
+fi
+
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    echo "stage 0: Data preparation"
+    # the following three directories will be created
+    # 1) data/timelag 2) data/duration 3) data/acoustic
+    python local/data_prep.py $hts_demo_root ./data --gain-normalize
+
+    echo "train/dev/eval split"
+    mkdir -p data/list
+    find data/acoustic/ -type f -name "*.wav" -exec basename {} .wav \; \
+        | sort > data/list/utt_list.txt
+    grep _003 data/list/utt_list.txt > data/list/$eval_set.list
+    grep _004 data/list/utt_list.txt > data/list/$dev_set.list
+    grep -v _003 data/list/utt_list.txt | grep -v _004 > data/list/$train_set.list
+fi
+
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    echo "stage 1: Feature generation"
+    . $NNSVS_COMMON_ROOT/feature_generation.sh
+fi
+
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    echo "stage 2: Training time-lag model"
+    . $NNSVS_COMMON_ROOT/train_timelag.sh
+fi
+
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    echo "stage 3: Training duration model"
+    . $NNSVS_COMMON_ROOT/train_duration.sh
+fi
+
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    echo "stage 4: Training acoustic model"
+    . $NNSVS_COMMON_ROOT/train_acoustic.sh
+fi
+
+if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
+    echo "stage 5: Generate features from timelag/duration/acoustic models"
+    . $NNSVS_COMMON_ROOT/generate.sh
+fi
+
+if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
+    echo "stage 6: Synthesis waveforms"
+    . $NNSVS_COMMON_ROOT/synthesis.sh
+fi
diff --git a/nnsvs/bin/conf/train/config.yaml b/nnsvs/bin/conf/train/config.yaml
@@ -8,3 +8,4 @@ defaults:
   - data: defaults
 
 verbose: 100
+seed: 773