Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dim-wise MDN: attempt to improve MDN-based models #44

Merged
merged 10 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# @package _group_

# training set
train_no_dev:
in_dir:
out_dir:

# development set
dev:
in_dir:
out_dir:

# data loader
num_workers: 2
batch_size: 2
pin_memory: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _group_
# (mgc, lf0, vuv, bap)
stream_sizes: [180, 3, 1, 15]
has_dynamic_features: [true, true, false, true]
num_windows: 3
# If None, automatically set based on stream sizes
stream_weights:

netG:
_target_: nnsvs.model.Conv1dResnetMDN
in_dim: 299
out_dim: 199
hidden_dim: 128
num_layers: 6
dropout: 0.1
num_gaussians: 4
dim_wise: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _group_
# (mgc, lf0, vuv, bap)
stream_sizes: [180, 3, 1, 15]
has_dynamic_features: [true, true, false, true]
num_windows: 3
# If None, automatically set based on stream sizes
stream_weights:

netG:
_target_: nnsvs.model.Conv1dResnetMDN
in_dim: 299
out_dim: 199
hidden_dim: 128
num_layers: 6
dropout: 0.1
num_gaussians: 4
dim_wise: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# @package _group_

out_dir: exp
nepochs: 200
checkpoint_epoch_interval: 20

stream_wise_loss: false
use_detect_anomaly: true

optim:
optimizer:
name: Adam
params:
lr: 0.001
betas: [0.9, 0.999]
weight_decay: 0.0
lr_scheduler:
name: StepLR
params:
step_size: 50
gamma: 0.5

resume:
checkpoint:
load_optimizer: false

cudnn:
benchmark: false
deterministic: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# @package _group_

# training set
train_no_dev:
in_dir:
out_dir:

# development set
dev:
in_dir:
out_dir:

# data loader
num_workers: 2
batch_size: 2
pin_memory: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _group_

stream_sizes: [1]
has_dynamic_features: [false]
stream_weights: [1]

netG:
_target_: nnsvs.model.MDN
in_dim: 295
out_dim: 1
hidden_dim: 1024
num_layers: 4
dropout: 0.5
num_gaussians: 4
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# @package _group_

out_dir: exp
nepochs: 50
checkpoint_epoch_interval: 20

stream_wise_loss: false
use_detect_anomaly: true

optim:
optimizer:
name: Adam
params:
lr: 0.001
betas: [0.9, 0.999]
weight_decay: 0.0
lr_scheduler:
name: StepLR
params:
step_size: 20
gamma: 0.5

resume:
checkpoint:
load_optimizer: false

cudnn:
benchmark: false
deterministic: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# @package _group_

# training set
train_no_dev:
in_dir:
out_dir:

# development set
dev:
in_dir:
out_dir:

# data loader
num_workers: 2
batch_size: 2
pin_memory: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _group_

stream_sizes: [1]
has_dynamic_features: [false]
stream_weights: [1]

netG:
_target_: nnsvs.model.MDN
in_dim: 295
out_dim: 1
hidden_dim: 1024
num_layers: 4
dropout: 0.5
num_gaussians: 4
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# @package _group_

out_dir: exp
nepochs: 50
checkpoint_epoch_interval: 20

stream_wise_loss: false
use_detect_anomaly: true

optim:
optimizer:
name: Adam
params:
lr: 0.001
betas: [0.9, 0.999]
weight_decay: 0.0
lr_scheduler:
name: StepLR
params:
step_size: 20
gamma: 0.5

resume:
checkpoint:
load_optimizer: false

cudnn:
benchmark: false
deterministic: true
65 changes: 65 additions & 0 deletions egs/nit-song070/svs-world-conv-mdn/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# General settings.
spk: "yoko"

# exp tag(for managing experiments)
tag:

###########################################################
# DATA PREPARATION SETTING #
###########################################################

# Directory of Unzipped singing voice database
# PLEASE CHANGE THE PATH BASED ON YOUR ENVIRONMENT
db_root: "downloads/HTS-demo_NIT-SONG070-F001"

# Output directory
out_dir: "./data"

###########################################################
# FEATURE EXTRACTION SETTING #
###########################################################

# HTS-style question used for extracting musical/linguistic context from musicxml files
question_path: "../../_common/hed/jp_qst003_nnsvs.hed"

timelag_features: defaults
duration_features: defaults
acoustic_features: static_deltadelta

###########################################################
# TRAINING SETTING #
###########################################################

# Models
# To customize, put your config or change ones in
# conf/train/{timelag,duration,acoustic}/ and
# specify the config name below
# NOTE: *_model: model definition, *_train: general train configs,
# *_data: data configs (e.g., batch size)

timelag_model: timelag_mdn
timelag_train: myconfig
timelag_data: myconfig

duration_model: duration_mdn
duration_train: myconfig
duration_data: myconfig

acoustic_model: acoustic_mdn
acoustic_train: myconfig
acoustic_data: myconfig

# Pretrained model dir (leave empty to disable)
pretrained_expdir:

###########################################################
# SYNTHESIS SETTING #
###########################################################
timelag_synthesis: defaults
duration_synthesis: defaults
acoustic_synthesis: defaults

# latest.pth or best.pth
timelag_eval_checkpoint: latest.pth
duration_eval_checkpoint: latest.pth
acoustic_eval_checkpoint: latest.pth
1 change: 1 addition & 0 deletions egs/nit-song070/svs-world-conv-mdn/local
100 changes: 100 additions & 0 deletions egs/nit-song070/svs-world-conv-mdn/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/bin/bash

# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail

function xrun () {
set -x
$@
set +x
}

script_dir=$(cd $(dirname ${BASH_SOURCE:-$0}); pwd)
NNSVS_ROOT=$script_dir/../../../
NNSVS_COMMON_ROOT=$NNSVS_ROOT/egs/_common/spsvs
. $NNSVS_ROOT/utils/yaml_parser.sh || exit 1;

eval $(parse_yaml "./config.yaml" "")

train_set="train_no_dev"
dev_set="dev"
eval_set="eval"
datasets=($train_set $dev_set $eval_set)
testsets=($dev_set $eval_set)

dumpdir=dump

dump_org_dir=$dumpdir/$spk/org
dump_norm_dir=$dumpdir/$spk/norm

stage=0
stop_stage=0

. $NNSVS_ROOT/utils/parse_options.sh || exit 1;

# exp name
if [ -z ${tag:=} ]; then
expname=${spk}
else
expname=${spk}_${tag}
fi
expdir=exp/$expname

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
if [ ! -e downloads/HTS-demo_NIT-SONG070-F001 ]; then
echo "stage -1: Downloading data"
mkdir -p downloads
cd downloads
curl -LO http://hts.sp.nitech.ac.jp/archives/2.3/HTS-demo_NIT-SONG070-F001.tar.bz2
tar jxvf HTS-demo_NIT-SONG070-F001.tar.bz2
cd $script_dir
fi
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo "stage 0: Data preparation"
# the following three directories will be created
# 1) data/timelag 2) data/duration 3) data/acoustic
python local/data_prep.py $hts_demo_root ./data --gain-normalize

echo "train/dev/eval split"
mkdir -p data/list
find data/acoustic/ -type f -name "*.wav" -exec basename {} .wav \; \
| sort > data/list/utt_list.txt
grep _003 data/list/utt_list.txt > data/list/$eval_set.list
grep _004 data/list/utt_list.txt > data/list/$dev_set.list
grep -v _003 data/list/utt_list.txt | grep -v _004 > data/list/$train_set.list
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: Feature generation"
. $NNSVS_COMMON_ROOT/feature_generation.sh
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "stage 2: Training time-lag model"
. $NNSVS_COMMON_ROOT/train_timelag.sh
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo "stage 3: Training duration model"
. $NNSVS_COMMON_ROOT/train_duration.sh
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
echo "stage 4: Training acoustic model"
. $NNSVS_COMMON_ROOT/train_acoustic.sh
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
echo "stage 5: Generate features from timelag/duration/acoustic models"
. $NNSVS_COMMON_ROOT/generate.sh
fi

if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
echo "stage 6: Synthesis waveforms"
. $NNSVS_COMMON_ROOT/synthesis.sh
fi
1 change: 1 addition & 0 deletions nnsvs/bin/conf/train/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ defaults:
- data: defaults

verbose: 100
seed: 773
Loading