-
Notifications
You must be signed in to change notification settings - Fork 497
/
hparams.py
88 lines (73 loc) · 2.31 KB
/
hparams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import tensorflow as tf
# NOTE: If you want full control for model architecture. please take a look
# at the code and change whatever you want. Some hyper parameters are hardcoded.
# Default hyperparameters:
hparams = tf.contrib.training.HParams(
name="wavenet_vocoder",
# Convenient model builder
builder="wavenet",
# Presets known to work good.
# NOTE: If specified, override hyper parameters with preset
preset="",
presets={
},
# Audio:
sample_rate=16000,
silence_threshold=2,
num_mels=80,
fft_size=1024,
# shift can be specified by either hop_size or frame_shift_ms
hop_size=256,
frame_shift_ms=None,
min_level_db=-100,
ref_level_db=20,
# Model:
layers=16,
stacks=2,
residual_channels=256,
gate_channels=512, # split into 2 gropus internally for gated activation
skip_out_channels=256,
dropout=1 - 0.95,
kernel_size=3,
# If True, apply weight normalization as same as DeepVoice3
weight_normalization=True,
# Local conditioning (None to disable)
cin_channels=80,
# If True, use transposed convolutions to upsample conditional features,
# otherwise repeat features to adjast time resolution
upsample_conditional_features=False,
# should np.prod(upsample_scales) == hop_size
upsample_scales=[16, 16],
# Global conditioning (None to disable)
# currently limited for speaker embedding
# this should only be enabled for multi-speaker dataset
gin_channels=None, # i.e., speaker embedding dim
n_speakers=7, # 7 for CMU ARCTIC
# Data loader
pin_memory=True,
num_workers=2,
# Loss
# Training:
batch_size=1,
adam_beta1=0.9,
adam_beta2=0.999,
adam_eps=1e-8,
initial_learning_rate=2e-3,
lr_schedule="noam_learning_rate_decay",
lr_schedule_kwargs={},
nepochs=2000,
weight_decay=0.0,
clip_thresh=1.0,
# If None, longer samples thean max_time_sec will be trimmed
# This is needed for those who don't have huge GPU memory...
max_time_sec=2.0,
# Save
checkpoint_interval=5000,
eval_interval=5000,
save_optimizer_state=True,
# Eval:
)
def hparams_debug_string():
values = hparams.values()
hp = [' %s: %s' % (name, values[name]) for name in sorted(values)]
return 'Hyperparameters:\n' + '\n'.join(hp)