In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import IPython.display as ipd
import numpy as np
import pandas as pd

In [169]:
# Computes a bunch of audio features and returns a dictionary
def AudioFeatures(y,sr,class_label):
    # Dictionary in which to store features
    feature_dict = {}
    
    # Compute means of features
    feature_dict['chroma_stft'] = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    feature_dict['spec_cent'] = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    feature_dict['spec_bw'] = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    feature_dict['rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    feature_dict['zcr'] = np.mean(librosa.feature.zero_crossing_rate(y))
     
    # Computes MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    mfcc_mean = [np.mean(x) for x in mfcc]

    for i in range(0,20):
        feature_dict['mfcc'+str(i)] = mfcc_mean[i]
        
    # Class label
    feature_dict['class'] = class_label
    
    return feature_dict

# Splits up audio file into chunks of some length, and extract audio features from those
def WindowExtract(y,sr,class_label,window_length_sec = 10):
    # List to store dictionaries of audio features
    audio_features_list = []

    # Divides up the audio file into chunks
    audio_len_sec = len(y)/sr # Total length of audio in seconds
    n_windows = int(audio_len_sec // window_length_sec) # Number of windows

    for i in range(0,n_windows+1):
        # Defines window beginning/end
        window_start = sr*i*window_length_sec
        window_end = sr*(i+1)*window_length_sec

        # Defines window
        window = y[window_start:window_end]

        feature_dict = AudioFeatures(window,sr,class_label)
        audio_features_list.append(feature_dict)
        
    return audio_features_list

# Given a dictionary of filenames and classes, process all the audio files contained therein 
def ProcessAudioFiles(mp3_classes_input,window_length_sec = 10):
    all_audio_features = []
    
    # Loops over all files
    for filepath,class_label in mp3_classes_input.items():
        print('Processing:',filepath)
        y , sr = librosa.load(filepath,mono=True)
        audio_features_list = WindowExtract(y,sr,class_label,window_length_sec)
        all_audio_features = all_audio_features+ audio_features_list
        
    print('Done processing')
    
    return all_audio_features

## Processing individual audio files

In [95]:
# Loads individual audio files
cello_audio_file = 'data/cello/bach_cello_prelude.mp3'
y_cello , sr = librosa.load(cello_audio_file,mono=True)

violin_audio_file = 'data/violin/bach_violin_partita.mp3'
y_violin , sr = librosa.load(violin_audio_file,mono=True)

#piano_audio_file = 'data/piano/bach_piano.mp3'
#y_piano , sr = librosa.load(piano_audio_file,mono=True)

#orch_audio_file = 'data/orch/bach_brandenburg.mp3'
#y_orch , sr = librosa.load(orch_audio_file,mono=True)

#sonata_audio_file = 'data/violin/beethoven_violin_sonata.mp3'
#y_sonata , sr = librosa.load(sonata_audio_file,mono=True)



In [111]:
# Extracts audio features
violin_features = WindowExtract(y_violin,sr,'violin',window_length_sec=10)
cello_features = WindowExtract(y_cello,sr,'cello',window_length_sec=10)
#piano_features = WindowExtract(y_piano,sr,'piano',window_length_sec=10)
#orch_features = WindowExtract(y_orch,sr,'orch',window_length_sec=10)
#sonata_features = WindowExtract(y_sonata,sr,'sonata',window_length_sec=10)

## Processing the entire audio directory of audio files

In [172]:
# Fetches file paths from folder
import glob
import re

training_file_paths = glob.glob("data/*/*.mp3") # .mp3 files inside folder
folder_paths = glob.glob("data/*/") # Names of folders

# Creates dictionary with mp3 filepaths and class
mp3_classes = {}
for file in training_file_paths:
    mp3_classes[file] = re.search(r'\/.*?\/',file).group(0).replace('/','')   
    
mp3_classes

{'data/orch/bach_brandenburg.mp3': 'orch',
 'data/violin/bach_violin_partita.mp3': 'violin',
 'data/cello/bach_cello_prelude.mp3': 'cello',
 'data/piano/bach_piano.mp3': 'piano'}

In [173]:
# Processes all audiofiles in dictionary
all_audio_features = ProcessAudioFiles(mp3_classes)

Processing: data/orch/bach_brandenburg.mp3




Processing: data/violin/bach_violin_partita.mp3




Processing: data/cello/bach_cello_prelude.mp3




Processing: data/piano/bach_piano.mp3




Done processing




In [174]:
# Creates a dataframe from list of dictionaries
audio_features_df = pd.DataFrame(all_audio_features)
audio_features_df

Unnamed: 0,chroma_stft,spec_cent,spec_bw,rolloff,zcr,mfcc0,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,class
0,0.264441,1765.298209,1847.487595,3385.159422,0.096877,-142.832611,117.925354,-29.570614,24.236523,-8.709535,...,4.889582,-3.782030,-4.716604,-8.348982,3.483846,-0.512892,1.370773,-4.181433,6.331887,orch
1,0.275000,1710.788541,1793.816858,3256.659658,0.094020,-177.162781,119.355293,-32.014610,23.327984,-7.843004,...,3.993509,-0.559095,4.113492,-1.478885,4.790418,-3.001189,-1.917148,-7.850810,0.337552,orch
2,0.261659,1720.081568,1751.512086,3232.328637,0.098791,-151.796692,122.403313,-38.164970,22.618837,-11.966754,...,-2.072212,-7.403428,-0.144369,-3.724543,4.346575,1.223852,2.222302,-8.712396,-3.578072,orch
3,0.274503,1606.622975,1666.549069,2995.413442,0.089610,-137.061188,131.126892,-36.884163,18.559828,-14.109509,...,-4.738740,-5.126620,2.212852,-1.700091,6.183526,1.274271,2.687211,-6.837768,0.207408,orch
4,0.263424,1770.654064,1802.390554,3260.156930,0.102727,-135.114471,119.442261,-30.716518,15.171096,-16.624706,...,1.106799,-2.966545,-0.027783,-3.154492,9.085186,2.232239,0.185392,-5.831506,6.190441,orch
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,0.253442,661.471822,673.154697,1111.158246,0.044555,-382.192444,231.027206,-10.599669,-0.648759,13.269133,...,-0.095024,-1.517338,-1.086360,-0.180927,-2.422057,-2.471755,-0.019865,0.575058,-1.768189,piano
139,0.225339,712.878986,688.142066,1114.855362,0.050897,-354.696228,212.869522,-5.067251,-8.552455,6.740084,...,-2.517924,-1.271465,-1.428250,-0.297679,0.036180,-3.371005,-2.652339,0.840888,-1.873575,piano
140,0.249630,632.052814,642.159749,1019.354857,0.042369,-385.620178,228.303635,-1.803966,-4.449956,12.736698,...,-0.062930,-1.343181,-0.682400,0.882803,0.255605,-1.100505,-0.293535,0.373888,0.038722,piano
141,0.265711,704.235384,784.238084,1183.102127,0.044077,-346.533356,201.028137,-12.628269,3.508238,11.951238,...,-1.644354,0.667186,-1.620208,-1.538307,-1.297938,-3.832280,-2.999320,-0.814642,-2.736331,piano


In [175]:
# Save dataframe as csv
audio_features_df.to_csv('audio_features.csv',index = False)