forked from r9y9/gossp
/
mglsadf_synthesizer.go
94 lines (80 loc) · 2.88 KB
/
mglsadf_synthesizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package vocoder
import (
"math"
)
// MGLSASpeechSynthesizer represents a speech synthesizer based on the
// MGLSA Filter.
type MGLSASpeechSynthesizer struct {
FrameShift int
Alpha float64 // all-pass constant
Gamma float64 // parameter of generalized logarithmic function
NumStage int
coreFilter *MGLSAFilter // used in sample by sample waveform generation
}
// NewMGLSASpeechSynthesizer returns its instance given parameters.
func NewMGLSASpeechSynthesizer(order int, alpha float64, numStage int,
frameShift int) *MGLSASpeechSynthesizer {
synthesizer := &MGLSASpeechSynthesizer{
FrameShift: frameShift,
Alpha: alpha,
NumStage: numStage,
Gamma: -1.0 / float64(numStage),
coreFilter: NewMGLSAFilter(order, alpha, numStage),
}
return synthesizer
}
// Synthesis synthesizes a speech signal from an excitation signal and
// corresponding mel-ceptrum sequence.
func (s *MGLSASpeechSynthesizer) Synthesis(excite []float64,
mgcepSequence [][]float64) []float64 {
// synthesized speech signal will be stored
synthesizedSpeech := make([]float64, len(excite))
previousMgcep := mgcepSequence[0]
for i, currentMgcep := range mgcepSequence {
if i > 0 {
previousMgcep = mgcepSequence[i-1]
}
startIndex, endIndex := i*s.FrameShift, (i+1)*s.FrameShift
if endIndex >= len(excite) {
break
}
// Synthesize a part of speech
partOfSpeech := s.SynthesisOneFrame(excite[startIndex:endIndex],
previousMgcep, currentMgcep)
for j, val := range partOfSpeech {
synthesizedSpeech[i*s.FrameShift+j] = val
}
}
return synthesizedSpeech
}
// SynthesisOneFrame synthesizes a part of speech signal from an excitation signal
// and succesive two mel-cepstrum sequence. It requires all-pass constant (alpha).
// Mel-cepstral coefficients between two succesive mel-cepstrum are linearly
// interpolated.
func (s *MGLSASpeechSynthesizer) SynthesisOneFrame(excite []float64,
previousMgcep, currentMgcep []float64) []float64 {
// Convert to MGLSA filter coefficients from Mel-cepstrum
currentFilterCoef := MGCep2MGLSAFilterCoef(currentMgcep, s.Alpha, s.Gamma)
previousFilterCoef := MGCep2MGLSAFilterCoef(previousMgcep, s.Alpha, s.Gamma)
// Compute slope
slope := make([]float64, len(currentMgcep))
for i := 0; i < len(slope); i++ {
slope[i] = (currentFilterCoef[i] - previousFilterCoef[i]) /
float64(len(excite))
}
partOfSpeech := make([]float64, len(excite))
linearlyInterpolatedCoef := make([]float64, len(previousFilterCoef))
copy(linearlyInterpolatedCoef, previousFilterCoef)
for i := 0; i < len(excite); i++ {
// Multyply power coeffcient
scaledExcitation := excite[i] * math.Exp(linearlyInterpolatedCoef[0])
// Filtering
partOfSpeech[i] = s.coreFilter.Filter(scaledExcitation,
linearlyInterpolatedCoef)
// Linear interpolation of filter coefficients
for j := 0; j < len(slope); j++ {
linearlyInterpolatedCoef[j] += slope[j]
}
}
return partOfSpeech
}