In [14]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import lfilter, freqz
from scipy.io import wavfile
import librosa

In [23]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display

def find_formants(vowel_signal, sample_rate):
    # Get the mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(y=vowel_signal, sr=sample_rate)
    
    # Transpose the MFCCs to have time on the x-axis
    mfcc = mfcc.T
    
    # Compute the first three formants using the MFCCs
    formants = []
    for i in range(3):
        formant = np.argmax(mfcc[:, i+1])  # Skip the 0th coefficient as it represents the overall loudness
        formants.append(formant)
    
    return formants

def plot_signal_with_formants(vowel_signal, sample_rate, formants):
    plt.figure(figsize=(10, 4))
    
    # Plot the waveform
    plt.subplot(1, 2, 1)
    librosa.display.waveshow(y=vowel_signal, sr=sample_rate)
    plt.title('Vowel Signal')
    
    # Plot the spectrogram
    plt.subplot(1, 2, 2)
    librosa.display.specshow(librosa.amplitude_to_db(librosa.stft(vowel_signal)), sr=sample_rate, x_axis='time', y_axis='log')
    plt.title('Spectrogram')
    plt.colorbar(format='%+2.0f dB')
    
    # Add vertical lines for the formants
    for formant in formants:
        plt.axvline(x=formant/sample_rate, color='red')
    
    plt.tight_layout()
    plt.show()

# Example usage
vowel_signal, sample_rate = librosa.load('/Users/lequangnhat/My Study/4-Signal-Processing/test/NguyenAmKiemThu-16k/01MDA/a.wav', sr=None)
formants = find_formants(vowel_signal, sample_rate)
print(formants)
# plot_signal_with_formants(vowel_signal, sample_rate, formants)

[26, 8, 6]


In [27]:
import parselmouth

def estimate_formants(file_path, max_formant=5500):

    """

    Estimate the first three formants (F1, F2, and F3) of a given vowel sound.



    Parameters:

        file_path (str): The path to the WAV file containing the vowel sound.

        max_formant (int): The maximum formant frequency to consider (recommended default is 5500 Hz for adult male voices).



    Returns:

        tuple: A tuple containing the estimated formant frequencies (F1, F2, F3).

    """

    # Load the sound file

    sound = parselmouth.Sound(file_path)

    # Analyze formants; assuming a point in time where the vowel is steady, here considering the midpoint

    formant = sound.to_formant_burg(maximum_formant=max_formant)

    midpoint = sound.duration / 2

    num_formants = 3  # We want to find the first three formants

    formant_freqs = []

    for i in range(1, num_formants + 1):

        formant_freq = formant.get_value_at_time(i, midpoint)

        formant_freqs.append(formant_freq)



    return tuple(formant_freqs)

# Example usage:

vowel_file = '/Users/lequangnhat/My Study/4-Signal-Processing/test/NguyenAmHuanLuyen-16k/34MQP/e.wav'  # Replace with the path to your vowel sound file

f1, f2, f3 = estimate_formants(vowel_file)

print(f"F1: {f1:.2f} Hz, F2: {f2:.2f} Hz, F3: {f3:.2f} Hz")

F1: 842.64 Hz, F2: 1824.97 Hz, F3: 2684.95 Hz
