In [None]:
using Pkg
Pkg.activate("../")

using Base.Filesystem

# This is to read wav files.
# See: https://github.com/dancasimiro/WAV.jl
using WAV
using Plots

using Revise
using SpeechFeatures

Download an example audio file from the TIMIT 
corpus.

In [None]:
if ! isfile("LDC93S1.wav")
    run(`wget https://catalog.ldc.upenn.edu/desc/addenda/LDC93S1.wav`)
end

Load the audio file

In [None]:
channels, srate = wavread("LDC93S1.wav", format="double")

# TIMIT data has only one channel (i.e. "mono") 
x = channels[:, 1]

println("sampling freq: $srate Hz\nduration: $(round(length(x) / srate, digits=2)) s")

t = range(0, length(x) / srate, length=length(x))
plot(t, x, size=(1000, 300), xlabel="time (seconds)", legend = false)

In [None]:
mfcc = MFCC(srate = 16000) # Standard MFCC features 
Δ_ΔΔ = DeltaCoeffs(order = 2) # Add first and second order "derivatives" to the features
mnorm = MeanNorm() # Remove the utterance mean
fea = x |> mfcc |> Δ_ΔΔ |> mnorm

heatmap(
    Vector(range(0, length(x) / srate, length = length(fea))),
    1:length(fea[1]),
    hcat(fea...),
    size = (900, 200),
    xlabel = "time (s)",
    c = :viridis
)