In [9]:
import numpy as np                     # numpy
import pandas as pd                    # pandas
import librosa
import librosa.display

import matplotlib.pyplot as plt

import os

In [10]:
def getFeatureVectorOfAudioFile(filepath) -> np.array([]):
    y, sr = librosa.load(filepath, sr=None)
    
    # Spectral
    rmsStd = np.std(librosa.feature.rms(y=y)) # how much variation in volume/loudness
    spectralContrastMean = np.mean(librosa.feature.spectral_contrast(y=y)) # mean of how much difference is the spectrum (for white noise 0)
    spectralCentroidMean = np.mean(librosa.feature.spectral_centroid(y=y)) # where in the spectrum is the signal the most
    
    # Time-based
    zcrStd = np.std(librosa.feature.zero_crossing_rate(y=y)) # Zero-crossing-rate variation
    zcrMean = np.mean(librosa.feature.zero_crossing_rate(y=y)) # Zero-crossing-rate mean
    tempogramSum = np.sum(librosa.feature.tempogram(y=y)) # intensity and relevance of onsets (like in a beat)
    
    featureVector = [rmsStd, spectralContrastMean, spectralCentroidMean, zcrStd, zcrMean, tempogramSum]
    
    return featureVector
    

In [11]:
# returns a feature matrix for a directory of tracks
def createFeatureMatrixForDirectory(dir: str) -> pd.DataFrame:
    dataFrame = pd.DataFrame(columns=['File', 'RMS Std', 'Spectral Contrast Mean', 'Spectral Centroid Mean', 'ZeroCrossingR Std', 'ZeroCrossingR Mean', 'Tempogram Sum'])
    
    if os.path.isdir(dir):
        for filename in os.listdir(dir):
            filepath = os.path.join(dir, filename)
            name, extension = os.path.splitext(filename)
            if os.path.isfile(filepath) and extension.upper() in ['.WAV']: #just take these extensions
                
                featureVector = getFeatureVectorOfAudioFile(filepath)
                
                newRow = pd.DataFrame.from_records([{
                    'File': name,
                    'RMS Std': featureVector[0],
                    'Spectral Contrast Mean': featureVector[1],
                    'Spectral Centroid Mean': featureVector[2],
                    'ZeroCrossingR Std': featureVector[3],
                    'ZeroCrossingR Mean': featureVector[4],
                    'Tempogram Sum': featureVector[5]
                }])
                dataFrame = pd.concat([dataFrame, newRow])
                
    dataFrame = dataFrame.sort_values(by='File')
    return dataFrame

In [14]:
def featurePerGenre(inputFrame: pd.DataFrame) -> pd.DataFrame:
    
    dataFrame = pd.DataFrame(columns=['Genre', 'RMS Std STD', 'RMS Std MEAN', 'Spectral Contrast Mean STD', 'Spectral Contrast Mean MEAN', 
                                      'Spectral Centroid Mean STD', 'Spectral Centroid Mean MEAN', 'ZeroCrossingR Std STD', 'ZeroCrossingR Std MEAN', 
                                      'ZeroCrossingR Mean STD', 'ZeroCrossingR Mean MEAN', 'Tempogram Sum STD', 'Tempogram Sum MEAN'])
    
    thrillerFrame = inputFrame[inputFrame['File'].str.startswith('01_')]
    danceFrame = inputFrame[inputFrame['File'].str.startswith('02_')]
    documentaryFrame = inputFrame[inputFrame['File'].str.startswith('03_')]
    
    newRow = pd.DataFrame.from_records([{
        'Genre': 'Thriller', 
        'RMS Std STD': np.std(thrillerFrame['RMS Std']), 
        'RMS Std MEAN': np.mean(thrillerFrame['RMS Std']), 
        'Spectral Contrast Mean STD': np.std(thrillerFrame['Spectral Contrast Mean']), 
        'Spectral Contrast Mean MEAN': np.mean(thrillerFrame['Spectral Contrast Mean']), 
        'Spectral Centroid Mean STD': np.std(thrillerFrame['Spectral Centroid Mean']), 
        'Spectral Centroid Mean MEAN': np.mean(thrillerFrame['Spectral Centroid Mean']), 
        'ZeroCrossingR Std STD': np.std(thrillerFrame['ZeroCrossingR Std']), 
        'ZeroCrossingR Std MEAN': np.mean(thrillerFrame['ZeroCrossingR Std']), 
        'ZeroCrossingR Mean STD': np.std(thrillerFrame['ZeroCrossingR Mean']), 
        'ZeroCrossingR Mean MEAN': np.mean(thrillerFrame['ZeroCrossingR Mean']), 
        'Tempogram Sum STD': np.std(thrillerFrame['Tempogram Sum']), 
        'Tempogram Sum MEAN': np.mean(thrillerFrame['Tempogram Sum'])
    }])
    dataFrame = pd.concat([dataFrame, newRow])
    
    newRow = pd.DataFrame.from_records([{
        'Genre': 'Dance', 
        'RMS Std STD': np.std(danceFrame['RMS Std']), 
        'RMS Std MEAN': np.mean(danceFrame['RMS Std']), 
        'Spectral Contrast Mean STD': np.std(danceFrame['Spectral Contrast Mean']), 
        'Spectral Contrast Mean MEAN': np.mean(danceFrame['Spectral Contrast Mean']), 
        'Spectral Centroid Mean STD': np.std(danceFrame['Spectral Centroid Mean']), 
        'Spectral Centroid Mean MEAN': np.mean(danceFrame['Spectral Centroid Mean']), 
        'ZeroCrossingR Std STD': np.std(danceFrame['ZeroCrossingR Std']), 
        'ZeroCrossingR Std MEAN': np.mean(danceFrame['ZeroCrossingR Std']), 
        'ZeroCrossingR Mean STD': np.std(danceFrame['ZeroCrossingR Mean']), 
        'ZeroCrossingR Mean MEAN': np.mean(danceFrame['ZeroCrossingR Mean']), 
        'Tempogram Sum STD': np.std(danceFrame['Tempogram Sum']), 
        'Tempogram Sum MEAN': np.mean(danceFrame['Tempogram Sum'])
    }])
    dataFrame = pd.concat([dataFrame, newRow])
    
    newRow = pd.DataFrame.from_records([{
        'Genre': 'Documentary', 
        'RMS Std STD': np.std(documentaryFrame['RMS Std']), 
        'RMS Std MEAN': np.mean(documentaryFrame['RMS Std']), 
        'Spectral Contrast Mean STD': np.std(documentaryFrame['Spectral Contrast Mean']), 
        'Spectral Contrast Mean MEAN': np.mean(documentaryFrame['Spectral Contrast Mean']), 
        'Spectral Centroid Mean STD': np.std(documentaryFrame['Spectral Centroid Mean']), 
        'Spectral Centroid Mean MEAN': np.mean(documentaryFrame['Spectral Centroid Mean']), 
        'ZeroCrossingR Std STD': np.std(documentaryFrame['ZeroCrossingR Std']), 
        'ZeroCrossingR Std MEAN': np.mean(documentaryFrame['ZeroCrossingR Std']), 
        'ZeroCrossingR Mean STD': np.std(documentaryFrame['ZeroCrossingR Mean']), 
        'ZeroCrossingR Mean MEAN': np.mean(documentaryFrame['ZeroCrossingR Mean']), 
        'Tempogram Sum STD': np.std(documentaryFrame['Tempogram Sum']), 
        'Tempogram Sum MEAN': np.mean(documentaryFrame['Tempogram Sum'])
    }])
    dataFrame = pd.concat([dataFrame, newRow])
    
    
    return dataFrame, thrillerFrame, danceFrame, documentaryFrame
    

In [17]:
def processAndDisplayAudioDirectoryFeatures(dir: str, plot: bool = True, showDataframes: bool = True, normalize: bool = False):
    featureMatrix = createFeatureMatrixForDirectory(dir)
    if normalize:
        for column in featureMatrix:
            if column != 'File':
                featureMatrix[column] = (featureMatrix[column] - featureMatrix[column].min()) / (featureMatrix[column].max() - featureMatrix[column].min())  
    genreFrame, thrillerFrame, danceFrame, documentaryFrame = featurePerGenre(featureMatrix)
    
    if plot:
        display(featureMatrix)
        display(genreFrame)

    if showDataframes:
        plt.title('Variation in volume (dynamics)')
        plt.plot(np.arange(0, len(thrillerFrame)), thrillerFrame['RMS Std'], label='Thriller')
        plt.plot(np.arange(0, len(danceFrame)), danceFrame['RMS Std'], label='Dance')
        plt.plot(np.arange(0, len(documentaryFrame)), documentaryFrame['RMS Std'], label='Documentary')
        plt.xlabel('Files')
        plt.ylabel('RMS Std')
        plt.legend(loc ="upper right")
        plt.show()

        plt.title('Variation in the frequency spectrum')
        plt.plot(np.arange(0, len(thrillerFrame)), thrillerFrame['Spectral Contrast Mean'], label='Thriller')
        plt.plot(np.arange(0, len(danceFrame)), danceFrame['Spectral Contrast Mean'], label='Dance')
        plt.plot(np.arange(0, len(documentaryFrame)), documentaryFrame['Spectral Contrast Mean'], label='Documentary')
        plt.xlabel('Files')
        plt.ylabel('Spectral Contrast Mean')
        plt.legend(loc ="upper right")
        plt.show()

        plt.title('Most prominent frequency')
        plt.plot(np.arange(0, len(thrillerFrame)), thrillerFrame['Spectral Centroid Mean'], label='Thriller')
        plt.plot(np.arange(0, len(danceFrame)), danceFrame['Spectral Centroid Mean'], label='Dance')
        plt.plot(np.arange(0, len(documentaryFrame)), documentaryFrame['Spectral Centroid Mean'], label='Documentary')
        plt.xlabel('Files')
        plt.ylabel('Spectral Centroid Mean')
        plt.legend(loc ="upper right")
        plt.show()

        plt.title('Variation of Zero-Crossing-Rate')
        plt.plot(np.arange(0, len(thrillerFrame)), thrillerFrame['ZeroCrossingR Std'], label='Thriller')
        plt.plot(np.arange(0, len(danceFrame)), danceFrame['ZeroCrossingR Std'], label='Dance')
        plt.plot(np.arange(0, len(documentaryFrame)), documentaryFrame['ZeroCrossingR Std'], label='Documentary')
        plt.xlabel('Files')
        plt.ylabel('ZeroCrossingR Std')
        plt.legend(loc ="upper right")
        plt.show()

        plt.title('Mean of Zero-Crossing-Rate')
        plt.plot(np.arange(0, len(thrillerFrame)), thrillerFrame['ZeroCrossingR Mean'], label='Thriller')
        plt.plot(np.arange(0, len(danceFrame)), danceFrame['ZeroCrossingR Mean'], label='Dance')
        plt.plot(np.arange(0, len(documentaryFrame)), documentaryFrame['ZeroCrossingR Mean'], label='Documentary')
        plt.xlabel('Files')
        plt.ylabel('ZeroCrossingR Mean')
        plt.legend(loc ="upper right")
        plt.show()

        plt.title('Amount and Intensity of Onsets')
        plt.plot(np.arange(0, len(thrillerFrame)), thrillerFrame['Tempogram Sum'], label='Thriller')
        plt.plot(np.arange(0, len(danceFrame)), danceFrame['Tempogram Sum'], label='Dance')
        plt.plot(np.arange(0, len(documentaryFrame)), documentaryFrame['Tempogram Sum'], label='Documentary')
        plt.xlabel('Files')
        plt.ylabel('Tempogram Sum')
        plt.legend(loc ="upper right")
        plt.show()
    
    