forked from NVIDIA/NeMo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
quartznet_test.yaml
110 lines (95 loc) · 2.33 KB
/
quartznet_test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
model: "QuartzNet"
sample_rate: 16000
dropout: &drop 0.2
AudioToTextDataLayer:
max_duration: 16.7
trim_silence: true
train:
shuffle: true
eval:
shuffle: false
max_duration: null
AudioToMelSpectrogramPreprocessor:
window_size: 0.02
window_stride: 0.01
window: "hann"
normalize: "per_feature"
n_fft: 512
features: 64
feat_type: "logfbank"
dither: 0.00001
pad_to: 16
stft_conv: true
SpectrogramAugmentation:
rect_masks: 5
rect_time: 120
rect_freq: 50
JasperEncoder:
activation: "relu"
conv_mask: true
jasper:
- filters: 256
repeat: 1
kernel: [33]
stride: [2]
dilation: [1]
dropout: *drop
residual: false
separable: true
- filters: 512
repeat: 3
kernel: [63]
stride: [1]
dilation: [1]
dropout: *drop
residual: true
separable: true
- filters: 512
repeat: 3
kernel: [63]
stride: [1]
dilation: [1]
dropout: *drop
residual: true
separable: true
- filters: 512
repeat: 3
kernel: [75]
stride: [1]
dilation: [1]
dropout: *drop
residual: true
separable: true
- filters: 512
repeat: 3
kernel: [75]
stride: [1]
dilation: [1]
dropout: *drop
residual: true
separable: true
- filters: 512
repeat: 3
kernel: [75]
stride: [1]
dilation: [1]
dropout: *drop
residual: true
separable: true
- filters: 512
repeat: 1
kernel: [87]
stride: [1]
dilation: [2]
dropout: *drop
residual: false
separable: true
- filters: 1024
repeat: 1
kernel: [1]
stride: [1]
dilation: [1]
dropout: *drop
residual: false
labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]