# Download & import packages

In [1]:
pip install --upgrade pip

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
!pip install librosa
!pip install h5py
!pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable




In [3]:
import os
import librosa
import numpy as np
import h5py
import tensorflow as tf
import csv
from concurrent.futures import ProcessPoolExecutor
import io 
import soundfile as sf
import pickle


import warnings
warnings.filterwarnings('ignore')

2024-04-24 16:30:54.608048: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-24 16:30:54.608492: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-24 16:30:54.611087: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-24 16:30:54.647124: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
print("Current working directory:", os.getcwd())

Current working directory: /projectnb/ba865/projects/Group3_A1


# Preprocess

In [5]:
# this function loads audio files from the file path
def process_audio(file_path):
    try:
        # Limit sampling rate to 4800
        audio, sr = librosa.load(file_path, sr=4800)
        audio = librosa.util.normalize(audio)
        # trim leading and trailing silence
        audio_trimmed, _ = librosa.effects.trim(audio, top_db=30, frame_length=2048, hop_length=512)
        # extract MFCCs and labels
        mfccs = librosa.feature.mfcc(y=audio_trimmed, sr=sr/10, n_mfcc=13)
        label = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
        return {'mfccs': mfccs, 'label': label}
    # alert us if something is wrong so we can debug
    except Exception as e:
        print(f"Failed to process {file_path}: {str(e)}")
        return None

# this function utilizes CPU to parallel process data
def process_audio_parallel(file_paths):
    with ProcessPoolExecutor() as executor:
        results = list(executor.map(process_audio, file_paths))
    return results

# this function goes through all 9 languages, one at a time, processes every MP3, and output Pickle file containing the features and labels
def batch_audio_processor(base_path):
    # goes into directories and reach MP3 files' location
    for language in os.listdir(base_path):
        lang_path = os.path.join(base_path, language, "clips")
        if os.path.isdir(lang_path):
            file_paths = [os.path.join(lang_path, file) for file in os.listdir(lang_path) if file.endswith('.mp3')]
            results = process_audio_parallel(file_paths)
            results = [result for result in results if result is not None]

            # stores processed data in Pickle
            output_file = os.path.join(base_path, f"{language}_mfccs_and_labels.pkl")
            with open(output_file, 'wb') as f:
                pickle.dump(results, f)
            # tracks preprocessing progress
            print(f"Processed and saved data for language: {language} using pickle")

base_path = '/projectnb/ba865/projects/Group3_A1/dataset'
batch_audio_processor(base_path)

Processed and saved data for language: english using pickle
Processed and saved data for language: french using pickle
Processed and saved data for language: chinese using pickle
