In [1]:
import numpy as np                     # numpy
import pandas as pd                    # pandas
import librosa
import librosa.display
import subprocess

import matplotlib.pyplot as plt
import soundfile as sf

import os

In [44]:
def getFeatureVectorOfAudioFile(filepath) -> np.array([]):
    y, sr = librosa.load(filepath, sr=None)
    
    # Spectral
    rmsStd = np.std(librosa.feature.rms(y=y)) # how much variation in volume/loudness
    spectralContrastMean = np.mean(librosa.feature.spectral_contrast(y=y)) # mean of how much difference is the spectrum (for white noise 0)
    spectralCentroidMean = np.mean(librosa.feature.spectral_centroid(y=y)) # where in the spectrum is the signal the most
    zcrStd = np.std(librosa.feature.zero_crossing_rate(y=y)) # Zero-crossing-rate variation
    zcrMean = np.mean(librosa.feature.zero_crossing_rate(y=y)) # Zero-crossing-rate mean
    
    # Time-based
    tempogramSum = np.sum(librosa.feature.tempogram(y=y)) # intensity and relevance of onsets (like in a beat)
    
    featureVector = [rmsStd, spectralContrastMean, spectralCentroidMean, zcrStd, zcrMean, tempogramSum]
    
    return featureVector
    

In [45]:
#getFeatureVectorOfAudioFile("../videos/processed/01_DevilsAdvocate_02.wav")

[0.033521697,
 19.360460111539354,
 989.2678646471101,
 0.04443188250371633,
 0.038132782277562866,
 205524.2703638143]

In [46]:
def processAndDisplayAudioDirectoryFeatures(dir: str, plot: bool = True, showDataframes: bool = True):
    dataFrame = pd.DataFrame(columns=['File', 'RMS Std', 'Spectral Contrast Mean', 'Spectral Centroid Mean', 'ZeroCrossingR Std', 'ZeroCrossingR Mean', 'Tempogram Sum'])
    
    if os.path.isdir(dir):
        for filename in os.listdir(dir):
            filepath = os.path.join(dir, filename)
            name, extension = os.path.splitext(filename)
            if os.path.isfile(filepath) and extension.upper() in ['.WAV']: #just take these extensions
                featureVector = getFeatureVectorOfAudioFile(filepath)
                newRow = pd.DataFrame.from_records([{
                    'File': name,
                    'RMS Std': featureVector[0],
                    'Spectral Contrast Mean': featureVector[1],
                    'Spectral Centroid Mean': featureVector[2],
                    'ZeroCrossingR Std': featureVector[3],
                    'ZeroCrossingR Mean': featureVector[4],
                    'Tempogram Sum': featureVector[5]
                }])
                dataFrame = pd.concat([dataFrame, newRow])
    
    return dataFrame

In [47]:
processAndDisplayAudioDirectoryFeatures(dir="../videos/processed", plot=True, showDataframes=True)

Unnamed: 0,File,RMS Std,Spectral Contrast Mean,Spectral Centroid Mean,ZeroCrossingR Std,ZeroCrossingR Mean,Tempogram Sum
0,02_HowSheMove_02,0.080128,18.875308,1948.826909,0.054895,0.092221,154561.973703
0,01_PulpFiction_04,0.047167,20.095615,1816.60855,0.035078,0.092653,185171.851674
0,01_PulpFiction_05,0.028004,19.174169,1774.12485,0.061951,0.091759,213211.061136
0,02_HowSheMove_03,0.04992,19.078835,1687.156611,0.038467,0.077914,198668.396928
0,03_ArcticTale_02,0.043664,21.209368,1326.980918,0.052909,0.065794,223684.215891
0,03_ArcticTale_03,0.049232,20.412388,1811.171907,0.05294,0.071703,189680.078656
0,02_HowSheMove_04,0.048772,17.339168,2044.972998,0.053381,0.071234,133592.024926
0,01_PulpFiction_02,0.048424,19.201557,1896.805835,0.059238,0.106563,247691.674776
0,01_PulpFiction_03,0.04602,18.943706,1932.157688,0.055644,0.105732,214849.963044
0,02_HowSheMove_05,0.04726,17.802465,1720.045289,0.062225,0.074777,163364.17452
