In [66]:
import os
import numpy as np
import matplotlib.pyplot as plt
import librosa
from utils import get_word_by_id, get_id_by_word
from tqdm import tqdm

In [67]:
# Function to plot spectrogram
def plot_spectrogram(y, sr, n_fft: int = 2048, hop_length: int = 512, file: str = '', save: bool = False):
    D = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
    librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max), sr=sr, hop_length=hop_length, x_axis='time', y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram of the audio: ' + file)

    if save:
        output_path = 'temp/spectogram'
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        plt.savefig(f'{output_path}/{file}.png')
        plt.close()
    else:
        plt.show()


In [68]:
# Process all .wav files in the directory with a progress bar
# audio_folder = 'datasets/A/'
# wav_files = [file for file in os.listdir(audio_folder) if file.endswith('.wav')]

# for file in tqdm(wav_files, desc="Processing audio files"):
#     y, sr = librosa.load(os.path.join(audio_folder, file))
#     plot_spectrogram(y, sr, file=file, save=True)

In [69]:
def plot_spectrogram_for_word(word: str, n_fft: int = 2048, hop_length: int = 512):
    audio_folder = 'datasets'

    # Get all subfolders inside 'datasets'
    sub_folders = [os.path.join(audio_folder, sub_folder) for sub_folder in os.listdir(audio_folder)]

    # Iterate through all subfolders and collect '.wav' files
    wav_files = []
    for sub_folder in sub_folders:
        if os.path.isdir(sub_folder):  # Check if it's a folder
            wav_files.extend(
                [os.path.join(sub_folder, file) for file in os.listdir(sub_folder) if file.endswith('.wav')]
            )

    # Filter files based on the word (if desired)
    word_files = [file for file in wav_files if word in os.path.basename(file)]
    
    for file in tqdm(word_files, desc=f"Processing audio files for word '{word}'"):
        y, sr = librosa.load(file)
        plot_spectrogram(y, sr, file=os.path.basename(file), save=True)
    

In [70]:
word = "khong"

In [71]:
plot_spectrogram_for_word(str(get_id_by_word(word)).zfill(2))

Processing audio files for word '35': 100%|██████████| 10/10 [00:02<00:00,  4.44it/s]
