GOSSP is a Speech Signal Processing library for the Go language, which includes time-frequency analysis, fundamental frequency estimation, spectral envelope estimation and waveform generation filters, etc.
- cwt - Continuous Wavelet Tranform (CWT) and inverse CWT (in develop).
- dct - Discrete Cosine Transform (DCT) and Inverse DCT.
- dtw - Dynamic Time Warping (DTW)
- excite - Excitation generation from fundamental frequency.
- f0 - Fundamental frequency (f0) estimatnion.
- io - Input/Output (in develop).
- mgcep - Mel-generalized cepstrum analysis for spectral envelope estimation.
- special - Special functions analogy to scipy in python.
- stft - Short-Time Fourier Transform (STFT) and Inverse STFT.
- vocoder - Speech waveform generation filters.
- window - Window functions.
- z - Z-transform to analyze digital filters.
go get github.com/r9y9/gossp
To use SPTK with GOSSP, you need to install the modified version of SPTK as follows:
git clone https://github.com/r9y9/SPTK.git && cd SPTK
./waf configure && ./waf
sudo ./waf install
package main
import (
"flag"
"fmt"
"github.com/r9y9/gossp"
"github.com/r9y9/gossp/io"
"github.com/r9y9/gossp/stft"
"github.com/r9y9/gossp/window"
"log"
"math"
)
func main() {
filename := flag.String("i", "input.wav", "Input filename")
flag.Parse()
w, werr := io.ReadWav(*filename)
if werr != nil {
log.Fatal(werr)
}
data := w.GetMonoData()
s := &stft.STFT{
FrameShift: int(float64(w.SampleRate) / 100.0), // 0.01 sec,
FrameLen: 2048,
Window: window.CreateHanning(2048),
}
spectrogram, _ := gossp.SplitSpectrogram(s.STFT(data))
PrintMatrixAsGnuplotFormat(spectrogram)
}
func PrintMatrixAsGnuplotFormat(matrix [][]float64) {
fmt.Println("#", len(matrix[0]), len(matrix)/2)
for i, vec := range matrix {
for j, val := range vec[:1024] {
fmt.Println(i, j, math.Log(val))
}
fmt.Println("")
}
}
package main
import (
"flag"
"fmt"
"github.com/r9y9/gossp/io"
"github.com/r9y9/gossp/stft"
"github.com/r9y9/gossp/window"
"log"
)
func main() {
filename := flag.String("i", "input.wav", "Input filename")
flag.Parse()
w, werr := io.ReadWav(*filename)
if werr != nil {
log.Fatal(werr)
}
data := w.GetMonoData()
s := &stft.STFT{
FrameShift: int(float64(w.SampleRate) / 100.0), // 0.01 sec,
FrameLen: 2048,
Window: window.CreateHanning(2048),
}
spectrogram := s.STFT(data)
// do something on spectrogram
reconstructed := s.ISTFT(spectrogram)
for i := range data {
// expect to be same
fmt.Println(data[i], reconstructed[i])
}
}
Fun with speech signal processing!