# Audio Feature Extraction with Essentia

In [None]:
!pip install numpy==1.24.3
!pip install essentia

1. Load audio files, for example google drive or any other directory

In [None]:
from google.colab import drive
import os, sys
drive.mount('/content/drive')
os.chdir("/content/drive/My Drive/...")
path = '/content/drive/My Drive/...'
os.listdir(path)

2. Create list of mp3 files

In [None]:
file_names = os.listdir(path)
print(len(file_names))
mp3_files = [file for file in file_names if file.endswith('.mp3')]
print(len(mp3_files))
print(mp3_files)

3. Extract audio features for all mp3 files

In [None]:
import essentia
import essentia.standard as es
import pandas as pd
from tqdm import tqdm


# List to hold dictionaries of features for each file
all_features = []

# Loop over each file to compute features
for audiofile in tqdm(mp3_files, desc='Processing Audio Files'):
    features, features_frames = es.MusicExtractor(lowlevelStats=['mean', 'stdev'],
                                                  rhythmStats=['mean', 'stdev'],
                                                  tonalStats=['mean', 'stdev'])(audiofile)

    #metadata
    file_name = features['metadata.tags.file_name']
    length = features['metadata.audio_properties.length']
    #rhythm
    bpm = features['rhythm.bpm']
    beats_count = features['rhythm.beats_count']
    beats_loudness = features['rhythm.beats_loudness.mean']
    danceability = features['rhythm.danceability']
    onset_rate = features['rhythm.onset_rate']
    #tonal
    chords_changes_rate = features['tonal.chords_changes_rate']
    chords_number_rate = features['tonal.chords_number_rate']
    chords_strength = features['tonal.chords_strength.mean']
    hpcp_crest = features['tonal.hpcp_crest.mean']
    hpcp_entropy = features['tonal.hpcp_entropy.mean']
    tuning_diatonic_strength = features['tonal.tuning_diatonic_strength']
    tuning_equal_tempered_deviation = features['tonal.tuning_equal_tempered_deviation']
    tuning_frequency = features['tonal.tuning_frequency']
    tuning_nontempered_energy_ratio = features['tonal.tuning_nontempered_energy_ratio']
    chords_key = features['tonal.chords_key']
    chords_scale = features['tonal.chords_scale']
    key_edma_strength = features['tonal.key_edma.strength']
    key_edma_key = features['tonal.key_edma.key']
    key_edma_scale = features['tonal.key_edma.scale']
    key_krumhansl_strength = features['tonal.key_krumhansl.strength']
    key_krumhansl_key = features['tonal.key_krumhansl.key']
    key_krumhansl_scale = features['tonal.key_krumhansl.scale']
    key_temperley_strength = features['tonal.key_temperley.strength']
    key_temperley_key = features['tonal.key_temperley.key']
    key_temperley_scale = features['tonal.key_temperley.scale']
    #low-level
    average_loudness = features['lowlevel.average_loudness']
    barkbands_crest = features['lowlevel.barkbands_crest.mean']
    barkbands_flatness_db = features['lowlevel.barkbands_flatness_db.mean']
    barkbands_kurtosis = features['lowlevel.barkbands_kurtosis.mean']
    barkbands_skewness = features['lowlevel.barkbands_skewness.mean']
    barkbands_spread = features['lowlevel.barkbands_spread.mean']
    dissonance = features['lowlevel.dissonance.mean']
    dynamic_complexity = features['lowlevel.dynamic_complexity']
    erbbands_crest = features['lowlevel.erbbands_crest.mean']
    erbbands_flatness_db = features['lowlevel.erbbands_flatness_db.mean']
    erbbands_kurtosis = features['lowlevel.erbbands_kurtosis.mean']
    erbbands_skewness = features['lowlevel.erbbands_skewness.mean']
    erbbands_spread = features['lowlevel.erbbands_spread.mean']
    hfc = features['lowlevel.hfc.mean']
    loudness_ebu128_int = features['lowlevel.loudness_ebu128.integrated']
    loudness_ebu128_range = features['lowlevel.loudness_ebu128.loudness_range']
    melbands_crest = features['lowlevel.melbands_crest.mean']
    melbands_flatness_db = features['lowlevel.melbands_flatness_db.mean']
    melbands_kurtosis = features['lowlevel.melbands_kurtosis.mean']
    melbands_skewness = features['lowlevel.melbands_skewness.mean']
    melbands_spread = features['lowlevel.melbands_spread.mean']
    pitch_salience = features['lowlevel.pitch_salience.mean']
    spectral_centroid = features['lowlevel.spectral_centroid.mean']
    spectral_complexity = features['lowlevel.spectral_complexity.mean']
    spectral_decrease = features['lowlevel.spectral_decrease.mean']
    spectral_energy = features['lowlevel.spectral_energy.mean']
    spectral_energyband_high = features['lowlevel.spectral_energyband_high.mean']
    spectral_energyband_low = features['lowlevel.spectral_energyband_low.mean']
    spectral_energyband_middle_high = features['lowlevel.spectral_energyband_middle_high.mean']
    spectral_energyband_middle_low = features['lowlevel.spectral_energyband_middle_low.mean']
    spectral_entropy = features['lowlevel.spectral_entropy.mean']
    spectral_flux = features['lowlevel.spectral_flux.mean']
    spectral_kurtosis = features['lowlevel.spectral_kurtosis.mean']
    spectral_rms = features['lowlevel.spectral_rms.mean']
    spectral_rolloff = features['lowlevel.spectral_rolloff.mean']
    spectral_skewness = features['lowlevel.spectral_skewness.mean']
    spectral_spread = features['lowlevel.spectral_spread.mean']
    spectral_strongpeak = features['lowlevel.spectral_strongpeak.mean']
    zerocrossingrate = features['lowlevel.zerocrossingrate.mean']

    feature_dict = {
        'file_name': file_name,
        'length(s)': length,
        'bpm': bpm,
        'beats_count': beats_count,
        'beats_loudness': beats_loudness,
        'danceability': danceability,
        'onset_rate': onset_rate,
        'chords_changes_rate':chords_changes_rate,
        'chords_number_rate': chords_number_rate,
        'chords_strength': chords_strength,
        'hpcp_crest': hpcp_crest,
        'hpcp_entropy': hpcp_entropy,
        'tuning_diatonic_strength':tuning_diatonic_strength,
        'tuning_equal_tempered_deviation':tuning_equal_tempered_deviation,
        'tuning_frequency':tuning_frequency,
        'tuning_nontempered_energy_ratio':tuning_nontempered_energy_ratio,
        'chords_key': chords_key,
        'chords_scale':chords_scale,
        'key_edma_strength': key_edma_strength,
        'key_edma_key':key_edma_key,
        'key_edma_scale':key_edma_scale,
        'key_krumhansl_strength':key_krumhansl_strength,
        'key_krumhansl_key':key_krumhansl_key,
        'key_krumhansl_scale':key_krumhansl_scale,
        'key_temperley_strength':key_temperley_strength,
        'key_temperley_key':key_temperley_key,
        'key_temperley_scale':key_temperley_scale,
        'average_loudness': average_loudness,
        'barkbands_crest': barkbands_crest,
        'barkbands_flatness_db': barkbands_flatness_db,
        'barkbands_kurtosis': barkbands_kurtosis,
        'barkbands_skewness': barkbands_skewness,
        'barkbands_spread':barkbands_spread,
        'dissonance': dissonance,
        'dynamic_complexity':dynamic_complexity,
        'erbbands_crest': erbbands_crest,
        'erbbands_flatness_db':erbbands_flatness_db,
        'erbbands_kurtosis':erbbands_kurtosis,
        'erbbands_skewness':erbbands_skewness,
        'erbbands_spread':erbbands_spread,
        'hfc':hfc,
        'loudness_ebu128_int':loudness_ebu128_int,
        'loudness_ebu128_range':loudness_ebu128_range,
        'melbands_crest':melbands_crest,
        'melbands_flatness_db':melbands_flatness_db,
        'melbands_kurtosis':melbands_kurtosis,
        'melbands_skewness':melbands_skewness,
        'melbands_spread':melbands_spread,
        'pitch_salience':pitch_salience,
        'spectral_centroid': spectral_centroid,
        'spectral_complexity': spectral_complexity,
        'spectral_decrease': spectral_decrease,
        'spectral_energy': spectral_energy,
        'spectral_energyband_high': spectral_energyband_high,
        'spectral_energyband_low': spectral_energyband_low,
        'spectral_energyband_middle_high': spectral_energyband_middle_high,
        'spectral_energyband_middle_low': spectral_energyband_middle_low,
        'spectral_entropy': spectral_entropy,
        'spectral_flux': spectral_flux,
        'spectral_kurtosis': spectral_kurtosis,
        'spectral_rms':spectral_rms,
        'spectral_rolloff':spectral_rolloff,
        'spectral_skewness': spectral_skewness,
        'spectral_spread': spectral_spread,
        'spectral_strongpeak': spectral_strongpeak,
        'zerocrossingrate': zerocrossingrate }

    all_features.append(feature_dict)

    #print(audiofile + " Done!")
# Create DataFrame from the list of dictionaries
df = pd.DataFrame(all_features)

4. Save to csv file.

In [None]:
df.to_csv('25audio_features.csv')

### Additional features
- mfcc and gfcc coefficients
- spectral contrast
- beats loudness band ratio
- chords histogram
- silence rate

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import essentia
import essentia.standard as es

#Store all rows here
data = []

#Loop through all files with progress tracking
for audiofile in tqdm(mp3_files, desc="Processing audio files"):
    try:
        features, features_frames = es.MusicExtractor(lowlevelStats=['mean', 'stdev'],
                                                      rhythmStats=['mean', 'stdev'],
                                                      tonalStats=['mean', 'stdev'])(audiofile)

        # Arrays
        mfcc = features['lowlevel.mfcc.mean']
        gfcc = features['lowlevel.gfcc.mean']
        spectral_contrast_coeffs = features['lowlevel.spectral_contrast_coeffs.mean']
        spectral_contrast_valleys = features['lowlevel.spectral_contrast_valleys.mean']
        beats_loudness_band_ratio = features['rhythm.beats_loudness_band_ratio.mean']
        chords_histogram = features['tonal.chords_histogram']

        # Single values
        silence_rate_20dB = features['lowlevel.silence_rate_20dB.mean']
        silence_rate_30dB = features['lowlevel.silence_rate_30dB.mean']
        silence_rate_60dB = features['lowlevel.silence_rate_60dB.mean']

        # Build row
        row = list(mfcc) + list(gfcc) + list(spectral_contrast_coeffs) + \
              list(spectral_contrast_valleys) + list(beats_loudness_band_ratio) + \
              list(chords_histogram) + \
              [silence_rate_20dB, silence_rate_30dB, silence_rate_60dB, audiofile]

        data.append(row)

    except Exception as e:
        print(f"❌ Error processing {audiofile}: {e}")

# Create DataFrame and save
columns = [
    'mfcc_0', 'mfcc_1', 'mfcc_2', 'mfcc_3', 'mfcc_4', 'mfcc_5', 'mfcc_6', 'mfcc_7', 'mfcc_8', 'mfcc_9',
    'mfcc_10', 'mfcc_11', 'mfcc_12',
    'gfcc_0', 'gfcc_1', 'gfcc_2', 'gfcc_3', 'gfcc_4', 'gfcc_5', 'gfcc_6', 'gfcc_7', 'gfcc_8', 'gfcc_9',
    'gfcc_10', 'gfcc_11', 'gfcc_12',
    'spectral_contrast_coeffs_0', 'spectral_contrast_coeffs_1', 'spectral_contrast_coeffs_2',
    'spectral_contrast_coeffs_3', 'spectral_contrast_coeffs_4', 'spectral_contrast_coeffs_5',
    'spectral_contrast_valleys_0', 'spectral_contrast_valleys_1', 'spectral_contrast_valleys_2',
    'spectral_contrast_valleys_3', 'spectral_contrast_valleys_4', 'spectral_contrast_valleys_5',
    'beats_loudness_band_ratio_0', 'beats_loudness_band_ratio_1', 'beats_loudness_band_ratio_2',
    'beats_loudness_band_ratio_3', 'beats_loudness_band_ratio_4', 'beats_loudness_band_ratio_5',
    'chords_histogram_0', 'chords_histogram_1', 'chords_histogram_2', 'chords_histogram_3',
    'chords_histogram_4', 'chords_histogram_5', 'chords_histogram_6', 'chords_histogram_7',
    'chords_histogram_8', 'chords_histogram_9', 'chords_histogram_10', 'chords_histogram_11',
    'chords_histogram_12', 'chords_histogram_13', 'chords_histogram_14', 'chords_histogram_15',
    'chords_histogram_16', 'chords_histogram_17', 'chords_histogram_18', 'chords_histogram_19',
    'chords_histogram_20', 'chords_histogram_21', 'chords_histogram_22', 'chords_histogram_23',
    'silence_rate_20dB', 'silence_rate_30dB', 'silence_rate_60dB',
    'filename'
]

# Assuming 'data' is a list of lists, where each list contains features followed by filename
df = pd.DataFrame(data, columns=columns)
df.to_csv("new_audio_features.csv", index=False)
print("✅ Features saved to new_audio_features.csv")
