In [1]:
## https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=650308

from scipy.io import wavfile
from scipy.fft import fft, fftfreq, ifft
import numpy as np
import matplotlib.pyplot as plt

In [2]:
rate, data = wavfile.read("audio/vowel.wav")
data = np.array(data, dtype = np.int32) # otherwise it defaults to uint8...

In [3]:
# signal pre-emphasis = only look at first order differences

diffs = np.array([(data[i+1] - data[i] * 0.97) for i in range(len(data)-1)])

In [4]:
## find windows of size 25 ms every 10 ms
window_len = 0.025 # s
window_interval = 0.010 # s
window_len_samples = int(window_len * rate)
window_interval_samples = int(window_interval * rate)
N = len(data)
windows = []
for i in range(0, N, window_interval_samples):
    if i + window_len * rate < N: # the window starting at sample i fits
        windows.append(np.array(diffs[i:i+window_len_samples]))

In [5]:
## focus on 35th window, which is deep enough into the recording to be a vowel
## 0-pad to 512, multiply by hamming window, then find fft. graph it so we can estimate what formants should look like

FFT_SIZE = 512
w = windows[0]

w = np.concatenate((w, np.zeros(FFT_SIZE - len(w))))
w *= np.hamming(FFT_SIZE)

s = fft(data)

freq_fft = fftfreq(FFT_SIZE, 1/rate)[:FFT_SIZE//2]
plt.plot(freq_fft, np.log(abs(s[:FFT_SIZE//2])))
plt.savefig(f"formant/vowel_fft_hamming.png")
plt.close()