In [2]:
!pip install librosa numpy matplotlib

Collecting librosa
  Downloading librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Collecting numpy
  Using cached numpy-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting matplotlib
  Using cached matplotlib-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting scipy>=1.2.0 (from librosa)
  Using cached scipy-1.15.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting scikit-learn>=0.20.0 (from librosa)
  Using cached scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting joblib>=0.14 (from librosa)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting numba>=0.51.0 (from librosa)
  Downloading numba-0.61.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.8 kB)
Collec

In [28]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os

In [45]:
def extract_features(file_path, mp3_file, sr=22050, n_mfcc=13, output_dir="features"):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=sr)
    
    # 1. Mel Spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # 2. MFCC (Mel-Frequency Cepstral Coefficients)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    # 3. CQT (Constant-Q Transform)
    cqt = librosa.feature.chroma_cqt(y=y, sr=sr)

    # 5. Chromagram
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)

    # 6. PLP (Perceptual Linear Prediction) - using MFCC with htk=True
    plp = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, htk=True)

    # Extract the base name of the mp3 file (without extension)
    base_filename = os.path.basename(file_path).replace('.mp3', '')
    
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # List of features and their names
    features = {
        'Mel Spectrogram': mel_spectrogram_db,
        'MFCC': mfcc,
        'CQT': cqt,
        'Chromagram': chroma,
        'PLP': plp
    }
    
    # Plot and save each feature as an image
    for feature_name, feature_data in features.items():

       # Create a folder for each feature type and use file name and feature name
        feature_folder = os.path.join(output_dir, f"{feature_name.replace(' ', '_')}")
        if not os.path.exists(feature_folder):
            os.makedirs(feature_folder)
            
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(feature_data, x_axis='time', sr=sr)
        plt.colorbar(format='%+2.0f dB')
        plt.title(f"{feature_name}")
        plt.tight_layout()

        # Save the image as a .png file in the corresponding feature folder
        output_path = os.path.join(feature_folder, f"{base_filename}-{feature_name.replace(' ', '_')}.png")
        plt.savefig(output_path)
        #print(f"Saved {feature_name} as {output_path}")
        plt.close()
        
    return features


In [47]:
folder_path = "dataset/"
mp3_files = [f for f in os.listdir(folder_path) if f.endswith(".mp3")]
    
# Process each file
for mp3_file in mp3_files:
    file_path = os.path.join(folder_path, mp3_file)
    print(f"Processing {file_path}...")
    extract_features(file_path, mp3_file, output_dir="features")

Processing dataset/bananas_from_seoul.mp3...
Processing dataset/DeclanMcKenna–Elephant-Demo.mp3...
