In [None]:
!pip install essentia



In [None]:
!pip install numpy==1.23.0

Collecting numpy==1.23.0
  Using cached numpy-1.23.0-cp311-cp311-linux_x86_64.whl
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.4
    Uninstalling numpy-2.2.4:
      Successfully uninstalled numpy-2.2.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-contrib-python 4.11.0.86 requires numpy>=1.23.5; python_version >= "3.11", but you have numpy 1.23.0 which is incompatible.
chex 0.1.89 requires numpy>=1.24.1, but you have numpy 1.23.0 which is incompatible.
pymc 5.21.1 requires numpy>=1.25.0, but you have numpy 1.23.0 which is incompatible.
plotnine 0.14.5 requires numpy>=1.23.5, but you have numpy 1.23.0 which is incompatible.
pandas-stubs 2.2.2.240909 requires numpy>=1.23.5, but you have numpy 1.23.0 which is incompatible.
astropy 7.0.1 requires numpy>=1.23.2, but you have numpy 1.23.0 

In [None]:
!pip install pydub



In [None]:
!pip uninstall essentia -y
!pip install essentia
!apt-get update
!apt-get install -y libavcodec-dev libavformat-dev libswscale-dev
!pip install -v --no-cache-dir --force-reinstall essentia

Found existing installation: essentia 2.1b6.dev1110
Uninstalling essentia-2.1b6.dev1110:
  Successfully uninstalled essentia-2.1b6.dev1110
Collecting essentia
  Using cached essentia-2.1b6.dev1110-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)
Using cached essentia-2.1b6.dev1110-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev1110
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,381 kB]
Get:7 http://archive.ubuntu.com/ubuntu jammy-upd

In [None]:
import essentia
print("Essentia version:", essentia.__version__)

Essentia version: 2.1-beta6-dev


In [None]:
import os
import essentia
import essentia.standard as es
import numpy as np
import concurrent.futures  # For parallel processing
import time
from pydub import AudioSegment

#If CUDA is supported
try:
    from essentia import cuda
    use_cuda = True
except:
    print("CUDA not supported in your essenta build.")
    use_cuda = False #Make it not run on GPU if not supported

def extract_audio_features(audio_file, use_cuda=False):
    """
    Extracts audio features from a single audio file using Essentia.
    Handles multi-channel audio and CUDA.

    Args:
        audio_file (str): Path to the audio file (.wav).
        use_cuda (bool): Whether to use GPU acceleration (if available).

    Returns:
        dict: A dictionary containing extracted audio features, or None if an error occurs.
    """
    try:
        # Convert to Mono using pydub
        sound = AudioSegment.from_wav(audio_file)
        sound = sound.set_channels(1) # Convert to mono
        mono_audio_file = "temp_mono.wav"
        sound.export(mono_audio_file, format="wav")

        # Load audio (single channel)
        if use_cuda:
            # Load using cuda if enabled
            audio = cuda.MonoLoader(filename=mono_audio_file, sampleRate=44100)()
            audio = audio_cuda()

        else:
            audio = es.MonoLoader(filename=mono_audio_file, sampleRate=44100)() # Load the audio from file
        # Remove the temporary
        os.remove(mono_audio_file)
        # 1. Extract Low-Level Features
        lowlevel = es.LowLevelDescriptors() #Returns a named tuple of all the values, including the RMS
        if use_cuda:
            lowlevel_cuda = cuda.LowLevelDescriptors()
            features_lowlevel = lowlevel_cuda(audio) # Use CUDA
        else:
            features_lowlevel = lowlevel(audio)
        #These low-level descriptiors include all the descriptors
        #RMS - features_lowlevel[1]
        #Spectral Centroid - features_lowlevel[7]
        #Zero-crossing rate - features_lowlevel[0]


        # 2. More Complex Descriptors - Extract MFCCs
        windowing = es.Windowing(type='hann') #Windowing for each frame
        spectrum = es.Spectrum()
        mfcc = es.MFCC()

        mfccs_all_frames = []
        #Now run through audio frames for time information
        for frame in es.FrameGenerator(audio, frameSize = 2048, hopSize = 512): #Example Frame Size, adjust accordingly
            # Apply Windowing
            frame_w = windowing(frame)
            #Create the spectrum for the frame with CUDA implementation
            X = spectrum(frame_w) # Get the frequency domain
            #Extract the values of the MFFCs
            mfcc_bands, mfcc_coeffs = mfcc(X)
            mfccs_all_frames.append(mfcc_coeffs)

        # Convert MFCCs to numpy array - Shape is (num_frames, num_mfccs)
        mfccs_all_frames = np.array(mfccs_all_frames) #Numpy for good analysis

        extracted_features = {
            "rms": features_lowlevel[1], #Second element in the named tuple
            "spectral_centroid": features_lowlevel[7], #8th element in the named tuple
            "zero_crossing_rate":features_lowlevel[0],# First element in the name tuple
            "mean_mfccs": np.mean(mfccs_all_frames, axis=0), # Averaged MFCCs
            "std_mfccs": np.std(mfccs_all_frames, axis=0), # MFCC standard deviation
            # Add more features here as needed
        }

        return extracted_features

    except Exception as e:
        print(f"Error processing audio file {audio_file}: {e}")
        return None

def process_audio_file(audio_file, output_folder, use_cuda):
    """Processes the audio file, and generates the folder"""
    # Get the base name
    file_name = os.path.basename(audio_file)
    file_name_without_ext, file_ext = os.path.splitext(file_name)
    output_txt_file = os.path.join(output_folder, f"{file_name_without_ext}_features.txt")
    print(f"Processing audio: {audio_file}")

    try:
        features = extract_audio_features(audio_file, use_cuda)

        if features:
            with open(output_txt_file, "w") as f:
                for key, value in features.items():
                    f.write(f"{key}: {value}\n")
            print(f"Features saved to {output_txt_file}")
        else:
            print("No features extracted.")

    except Exception as e:
        print(f"Error in process_audio_file: {e}")

def traverse_directory(root_directory, output_root_folder, max_workers=4, use_cuda=False):
    """
    Traverses a directory, processes all audio files, and extracts information for each file.
    Args:
        root_directory (string): the starting directory
        output_root_folder (string) : the directory to be put to
        max_workers (int) : the maximum number of workers
        use_cuda (bool) : whether the library should use CUDA to accelerate operations
    """
    audio_files = [] # The audio files to be worked on
    for foldername, subfolders, filenames in os.walk(root_directory):
        for filename in filenames:
            if filename.lower().endswith(('.wav')):
                input_file_path = os.path.join(foldername, filename)
                relative_path = os.path.relpath(foldername, root_directory)
                output_folder = os.path.join(output_root_folder, relative_path)
                os.makedirs(output_folder, exist_ok=True)
                audio_files.append((input_file_path, output_folder))  # Store path and folder

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        executor.map(lambda item: process_audio_file(item[0], item[1], use_cuda), audio_files)  # Process in parallel

# Example usage:

if __name__ == '__main__':
    input_directory = "/content/drive/MyDrive/Datasets/preprocess"  # Replace with your input directory
    output_directory = "/content/drive/MyDrive/AudioFeatures/"  # Replace with your output directory

    start_time = time.time()
    traverse_directory(input_directory, output_directory, max_workers = 4, use_cuda=use_cuda) # Use GPU, if it works
    end_time = time.time()
    print("Time is", end_time-start_time, "seconds") #See total time to make adjustments

CUDA not supported in your essenta build.
Error processing audio file /content/drive/MyDrive/Datasets/preprocess/God.Bless.America.2011__#01-21-50_01-22-35_label_B1-0-0_audio.wav: module 'essentia.standard' has no attribute 'LowLevelDescriptors'
No features extracted.
Processing audio: /content/drive/MyDrive/Datasets/preprocess/GoldenEye.1995__#00-05-41_00-07-32_label_B2-G-0_audio.wav
Error processing audio file /content/drive/MyDrive/Datasets/preprocess/God.Bless.America.2011__#01-15-20_01-16-40_label_A_audio.wav: module 'essentia.standard' has no attribute 'LowLevelDescriptors'
No features extracted.
Processing audio: /content/drive/MyDrive/Datasets/preprocess/God.Bless.America.2011__#01-38-25_01-39-20_label_B2-0-0_audio.wav
Error processing audio file /content/drive/MyDrive/Datasets/preprocess/God.Bless.America.2011__#01-13-20_01-15-05_label_B2-B6-0_audio.wav: module 'essentia.standard' has no attribute 'LowLevelDescriptors'
No features extracted.
Processing audio: /content/drive/My

KeyboardInterrupt: 

In [None]:
import essentia.standard as es

print(dir(es))  # List the attributes of the 'es' module

['AfterMaxToBeforeMaxEnergyRatio', 'AllPass', 'AudioLoader', 'AudioOnsetsMarker', 'AudioWriter', 'AutoCorrelation', 'BFCC', 'BPF', 'BandPass', 'BandReject', 'BarkBands', 'BeatTrackerDegara', 'BeatTrackerMultiFeature', 'Beatogram', 'BeatsLoudness', 'BinaryOperator', 'BinaryOperatorStream', 'BpmHistogram', 'BpmHistogramDescriptors', 'BpmRubato', 'CartesianToPolar', 'CentralMoments', 'Centroid', 'ChordsDescriptors', 'ChordsDetection', 'ChordsDetectionBeats', 'ChromaCrossSimilarity', 'Chromagram', 'Chromaprinter', 'ClickDetector', 'Clipper', 'ConstantQ', 'CoverSongSimilarity', 'Crest', 'CrossCorrelation', 'CrossSimilarityMatrix', 'CubicSpline', 'DCRemoval', 'DCT', 'Danceability', 'Decrease', 'Derivative', 'DerivativeSFX', 'DiscontinuityDetector', 'Dissonance', 'DistributionShape', 'Duration', 'DynamicComplexity', 'ERBBands', 'EasyLoader', 'EffectiveDuration', 'Energy', 'EnergyBand', 'EnergyBandRatio', 'Entropy', 'Envelope', 'EqloudLoader', 'EqualLoudness', 'Extractor', 'FFT', 'FFTC', 'Fade

In [None]:
!sudo apt-get install build-essential git cmake libfftw3-dev libaubio-dev libsamplerate0-dev libsndfile1-dev python3-dev python3-pip

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
build-essential is already the newest version (12.9ubuntu3).
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
git is already the newest version (1:2.34.1-1ubuntu1.12).
libsndfile1-dev is already the newest version (1.0.31-2ubuntu0.2).
python3-dev is already the newest version (3.10.6-1~22.04.1).
python3-dev set to manually installed.
The following additional packages will be installed:
  libaubio5 libfftw3-bin libfftw3-double3 libfftw3-long3 libfftw3-quad3
  libfftw3-single3 python3-setuptools python3-wheel
Suggested packages:
  libfftw3-doc python-setuptools-doc
The following NEW packages will be installed:
  libaubio-dev libaubio5 libfftw3-bin libfftw3-dev libfftw3-double3
  libfftw3-long3 libfftw3-quad3 libfftw3-single3 libsamplerate0-dev
  python3-pip python3-setuptools python3-wheel
0 upgraded, 12 newly installed, 0 to remove and 35 not upgraded.
Need to get 6,580 kB of a

In [None]:
!cmake -D CMAKE_BUILD_TYPE=Release \
       -D CMAKE_INSTALL_PREFIX=/usr/local \
       -D WITH_CUDA=ON \
       -D CUDA_ARCH_BIN="7.5" \
       -D CUDA_ARCH_PTX="" \
       -D WITH_CUDNN=ON \
       -D OPENCV_DNN_CUDA=ON \
       -D ENABLE_FAST_MATH=1 \
       -D CUDA_FAST_MATH=1 \
       -D WITH_TBB=ON \
       -D BUILD_opencv_python3=ON \
       -D OPENCV_GENERATE_PKGCONFIG=ON \
       -D BUILD_EXAMPLES=OFF \
       -D OPENCV_EXTRA_MODULES_PATH=../opencv_contrib/modules \
       -D CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda \
       ../opencv


Error processing audio file /content/drive/MyDrive/Datasets/preprocess/God.Bless.America.2011__#00-42-23_00-45-12_label_A_audio.wav: module 'essentia.standard' has no attribute 'LowLevelDescriptors'
No features extracted.
Processing audio: /content/drive/MyDrive/Datasets/preprocess/God.Bless.America.2011__#00-54-15_00-56-05_label_A_audio.wav
[0mCMake Error: The source directory "/opencv" does not exist.
Specify --help for usage, or press the help button on the CMake GUI.[0m


In [None]:
import numpy as np
print("NumPy version:", np.__version__)

NumPy version: 2.0.2


In [None]:
!pip install --upgrade numpy



In [None]:
!pip uninstall essentia -y
!pip install numpy  # Ensure NumPy is installed first
!pip install essentia

Found existing installation: essentia 2.1b6.dev1110
Uninstalling essentia-2.1b6.dev1110:
  Successfully uninstalled essentia-2.1b6.dev1110
Collecting essentia
  Using cached essentia-2.1b6.dev1110-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)
Using cached essentia-2.1b6.dev1110-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.7 MB)
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev1110


In [None]:
!pip install --upgrade torch torchvision torchaudio

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import librosa
import librosa.display
import numpy as np
import os
import soundfile as sf  # For writing audio files
import torch  # Import PyTorch
import concurrent.futures

# CUDA Function and Check
def is_cuda_available():
    return torch.cuda.is_available()

if is_cuda_available():
    device = torch.device("cuda")
    print("CUDA is available! Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

def extract_audio_features(audio_path, output_dir, use_cuda=False):
    """
    Extracts audio features from a .wav file using Librosa, with optional GPU acceleration.

    Args:
        audio_path: Path to the input .wav file.
        output_dir: Directory to save the extracted features.
        use_cuda: Whether to use CUDA for processing (if available).
    """
    try:
        # Load audio with Librosa, potentially using GPU through TorchAudio/torchaudio
        y, sr = librosa.load(audio_path)  # y: audio time series, sr: sample rate

        # Convert to tensor
        y_tensor = torch.tensor(y, dtype=torch.float32, device=device)
        sr_tensor = torch.tensor(sr, dtype=torch.int, device=device)

        # Use CUDA/GPU if available and specified
        if use_cuda and is_cuda_available():
            y_tensor = y_tensor.cuda()
            sr_tensor = sr_tensor.cuda()

        # Extract MFCCs (example feature)
        mfccs = librosa.feature.mfcc(y=y_tensor.cpu().numpy(), sr=sr, n_mfcc=13)  # Send to CPU before Librosa
        mfccs_tensor = torch.tensor(mfccs, dtype=torch.float32, device=device)  # Then move to device and convert

        # Can perform any other librosa operations

        #Save file
        file_name = os.path.basename(audio_path) #Extract File from Filepath
        name, extension = os.path.splitext(file_name) #Split name and extension
        output_file = os.path.join(output_dir, f"{name}_mfccs.npy")

        # Save features (NumPy array)
        np.save(output_file, mfccs_tensor.cpu().numpy()) # Move to cpu to access numpy

        print(f"Extracted features from {audio_path} and saved to {output_file}")

    except Exception as e:
        print(f"Error processing {audio_path}: {e}")

def process_audio_file(input_file_path, output_folder, use_cuda):
    """Wrapper function to process a single audio file."""
    print(f"Processing: {input_file_path}")
    extract_audio_features(input_file_path, output_folder, use_cuda) # Extract and save results
    print(f"Finished Processing: {input_file_path}")

def traverse_directory(root_directory, output_root_folder, max_workers=4, use_cuda=True):
    """
    Traverses a directory and processes all audio (.wav) files in parallel using a thread pool.

    Args:
        root_directory: Path to the directory containing the audio files.
        output_root_folder: Path to the root directory to save the extracted features.
        max_workers: Maximum number of threads to use for parallel processing.
        use_cuda: Whether to use CUDA for processing (if available).
    """
    audio_files = []
    for foldername, subfolders, filenames in os.walk(root_directory):
        for filename in filenames:
            if filename.lower().endswith(".wav"): # Check audio extensions
                input_file_path = os.path.join(foldername, filename)
                relative_path = os.path.relpath(foldername, root_directory) #Get the relative
                output_folder = os.path.join(output_root_folder, relative_path) #Use it for each case
                os.makedirs(output_folder, exist_ok=True)
                audio_files.append((input_file_path, output_folder)) # Store path and folder

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        executor.map(lambda item: process_audio_file(item[0], item[1], use_cuda), audio_files) # Run it all

if __name__ == "__main__":
    input_directory = "/content/drive/MyDrive/Datasets/preprocess"  # Replace with your input directory. This is the directory with the audio dataset
    output_directory = "/content/drive/MyDrive/Datasets/AudioFeatures"
    os.makedirs(output_directory, exist_ok=True) #Create output folder

    # Create dummy data, use this for sample testing
    # audio_files = os.listdir(input_directory)
    # for audio in audio_files:
    #     test_audio, sr = librosa.load(os.path.join(input_directory, audio))
    #     sf.write(os.path.join(input_directory, audio), test_audio, sr) # Save as new file

    traverse_directory(input_directory, output_directory, max_workers=3, use_cuda=True) # Run all functions

CUDA is available! Using GPU.
Processing: /content/drive/MyDrive/Datasets/preprocess/Fury.2014__#01-24-39_01-26-24_label_A_audio.wavProcessing: /content/drive/MyDrive/Datasets/preprocess/Fury.2014__#01-42-02_01-47-12_label_B2-G-0_audio.wav

Processing: /content/drive/MyDrive/Datasets/preprocess/Fury.2014__#01-48-19_01-56-29_label_B2-G-0_audio.wav
Extracted features from /content/drive/MyDrive/Datasets/preprocess/Fury.2014__#01-24-39_01-26-24_label_A_audio.wav and saved to /content/drive/MyDrive/Datasets/AudioFeatures/./Fury.2014__#01-24-39_01-26-24_label_A_audio_mfccs.npy
Finished Processing: /content/drive/MyDrive/Datasets/preprocess/Fury.2014__#01-24-39_01-26-24_label_A_audio.wav
Processing: /content/drive/MyDrive/Datasets/preprocess/Fury.2014__#01-58-01_01-59-04_label_B2-G-0_audio.wav
Extracted features from /content/drive/MyDrive/Datasets/preprocess/Fury.2014__#01-58-01_01-59-04_label_B2-G-0_audio.wav and saved to /content/drive/MyDrive/Datasets/AudioFeatures/./Fury.2014__#01-58-01

In [None]:
!pip uninstall -y torch torchvision torchaudio
!pip install torch torchvision torchaudio

Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
Error processing audio file /content/drive/MyDrive/Datasets/preprocess/Good.Will.Hunting.1997__#01-52-20_01-58-00_label_A_audio.wav: module 'essentia.standard' has no attribute 'LowLevelDescriptors'
No features extracted.
Processing audio: /content/drive/MyDrive/Datasets/preprocess/Haywire.2011__#01-16-30_01-18-01_label_A_audio.wav
Error processing audio file /content/drive/MyDrive/Datasets/preprocess/Haywire.2011__#01-05-21_01-06-37_label_A_audio.wav: module 'essentia.standard' has no attribute 'LowLevelDescriptors'
No features extracted.
Processing audio: /content/drive/MyDrive/Datasets/preprocess/Hear.Me.2009__#00-06-37_00-08-22_label_A_audio.wav
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaud