In [2]:
import IPython, nussl, numpy as np, scipy as sp, matplotlib.pyplot as plt, matplotlib, sklearn, librosa, cmath,math,time,vamp
import os,csv
from IPython.display import Audio, display
from scipy.fftpack import fft, ifft
 
# This line makes sure your plots happen IN the webpage you're building, instead of in separate windows.
%matplotlib inline

The following functions are used to calculates a 9 dimensional vector corresponding to the musical surface features outlined section 3.1 in Tzanetazis's Automatic Musical Genre Classification Of Audio Signals. The paper can be found by following the link below:
http://www.cs.northwestern.edu/~pardo/courses/eecs352/papers/genre%20classification%20low%20level%20-%20tzanetakis.pdf

In [3]:
def meanAndStandardDeviationCentroid(signal, sample_rate):
    """
    computes the mean and standard deviation centroid, given an audio vector of an audio file
    """
    centroid = vamp.collect(signal,sample_rate,'vamp-libxtract:spectral_centroid')['vector'][1]
    mean = np.mean(centroid)
    standard_dev = np.std(centroid)
    return mean, standard_dev

def meanAndStandardDeviationRolloff(signal, sample_rate):
    """
    computes the mean and standard deviation rolloff, given an audio vector of an audio file
    """
    rolloff = vamp.collect(signal,sample_rate,'vamp-libxtract:rolloff')['vector'][1]
    
    mean = np.mean(rolloff)
    standard_dev = np.std(rolloff)
    return mean, standard_dev
    
def zeroCrossings(signal,sample_rate):
    """
    computes zero crossings value of a signal
    """
    zero_crossings = vamp.collect(signal,sample_rate,'vamp-libxtract:zcr')['vector'][1]
    
    mean = np.mean(zero_crossings)
    standard_dev = np.std(zero_crossings)
    return mean,standard_dev

def flux(signal,sample_rate):
    """
    computes the flux of a signal
    """
    flux = vamp.collect(signal,sample_rate,'bbc-vamp-plugins:bbc-spectral-flux')['vector'][1]
    
    mean = np.mean(flux)
    standard_dev = np.std(flux)
    return mean,standard_dev

def low_energy(signal,sample_rate):
    """
    computes the low_energy value for a signal
    """
    energy = a=vamp.collect(signal,sample_rate,'bbc-vamp-plugins:bbc-energy')['vector'][1]
    
    mean = np.mean(energy)
    count = 0.
    for el in energy:
        if el < mean:
            count += 1
    return count/float(len(energy))

def surface_features(signal, sample_rate):
    """
    Calculates a 9 dimensional vector corresponding to the musical surface features outlined in Tzanetazis's Automatic Musical Genre Classification
    Of Audio Signals
    """
    mean_centroid,std_centroid = meanAndStandardDeviationCentroid(signal, sample_rate)
    mean_rolloff,std_rolloff = meanAndStandardDeviationRolloff(signal, sample_rate)
    mean_zcr,std_zcr = zeroCrossings(signal, sample_rate)
    mean_flux,std_flux = flux(signal, sample_rate)
    low_e = low_energy(signal, sample_rate)
    surface_features = np.array([mean_centroid,std_centroid,mean_rolloff,std_rolloff,mean_zcr,std_zcr,mean_flux,std_flux,low_e])
    return surface_features

The following functions are used to calculate a 8 dimensional vector corresponding to the rhythmic features outlined section 3.2 in Tzanetazis's Automatic Musical Genre Classification Of Audio Signals.

In [4]:
def rhythm_features(signal,sample_rate):
    oenv = librosa.onset.onset_strength(y=signal, sr=sample_rate)
    tempo = estimate_tempo(oenv,sample_rate)
    top_five_tempos, top_five_tempo_values = tempogram_analysis(signal, sample_rate)
    tempos =  np.append(top_five_tempos, top_five_tempo_values)
    return np.append(tempos, tempo)

def estimate_tempo(oenv,sample_rate):
    return librosa.beat.estimate_tempo(oenv,sr=sample_rate)

def tempogram_analysis(signal, sample_rate):
    tempogram = librosa.feature.tempogram(y=signal, sr = sample_rate)
    #take out bpm of zero
    means_over_time = np.mean(tempogram[1:], axis = 1)
    top_five_tempos = np.argpartition(means_over_time, -5)[-5:]
    top_five_tempo_values = means_over_time[top_five_tempos]
    return top_five_tempos, top_five_tempo_values

In [5]:
## given an audio file, computes a feature vector 
def make_feature_vector(file_path, sample_rate):
    signal, sr = librosa.load(path=file_path, sr=sample_rate)
    s_f = surface_features(signal, sr)
    r_f = rhythm_features(signal, sr)
    genre_name = file_path.split('/')[-1].split('.')[0]
    
    feature_vector = np.concatenate((s_f, r_f, np.array([genre_name])))
    return feature_vector
    

In [28]:
def create_datasets(sample_rate):
    dirs = ['datasets/genres','datasets/genres-hpss','datasets/genres-repet']
    types = [['control']['percussive','harmonic'],['foreground','background']]
    genres = [d for d in os.listdir('datasets/genres') if os.path.isdir(os.path.join('datasets/genres', d))]
    if not os.path.exists('features'):
        os.makedirs('features')
    for index, directory in enumerate(dirs):
        for separated_type in types[index]:
            csv_name = 'features/control.csv' if separated_type == "control" else 'features/' + directory.split('-')[1] + '_' + separated_type + '.csv'
            create_dataset(sample_rate, directory, csv_name,genres, separated_type)
    return
                    
def create_dataset(sample_rate, directory, csv_name, genres, separated_type = "control", num_files = 100):
    print "Creating Dataset from directory: " + directory + " of type: " + separated_type
    features = []
    for genre in genres:
        for n in np.arange(num_files):
            num = str(n) if n > 9  else '0' + str(n)
            file_name = genre +'.000'+num +'.au' if separated_type == "control" else genre+'.'+ separated_type +'.000' + num +'.au'
            path = directory + '/' + genre + '/' + file_name
            features.append(make_feature_vector(path, sample_rate))
    
    with open(csv_name,'wb') as csvfile:
        writer = csv.writer(csvfile)
        for row in features:
            writer.writerow(row)
    return

Run the following code to create a csv file of extracted audio features

In [31]:
sr = 11025
create_dataset(sr, "datasets/genres", "features/test.csv", ["blues", "metal", "rock"], num_files = 2)

Creating Dataset from directory: datasets/genres of type: control


TypeError: Failed to load plugin: vamp-libxtract:spectral_centroid

Testing stuff below:

In [7]:
#Variables used in testing

n_fft = 2048
hop_length = 512
sr = 11025

# test signals
signal0, sr = librosa.load(path='datasets/genres/blues/blues.00000.au', sr=sr)
signal1, sr = librosa.load(path='datasets/genres/blues/blues.00001.au', sr=sr)
background0, sr = librosa.load(path='datasets/genres-repet/blues/blues.background.00000.au', sr=sr)
background1, sr = librosa.load(path='datasets/genres-repet/blues/blues.background.00001.au', sr=sr)
foreground0, sr = librosa.load(path='datasets/genres-repet/blues/blues.foreground.00000.au', sr=sr)
foreground1, sr = librosa.load(path='datasets/genres-repet/blues/blues.foreground.00001.au', sr=sr)
harmonic0, sr = librosa.load(path='datasets/genres-hpss/blues/blues.harmonic.00000.au', sr=sr)
harmonic1, sr = librosa.load(path='datasets/genres-hpss/blues/blues.harmonic.00001.au', sr=sr)
percussive0, sr = librosa.load(path='datasets/genres-hpss/blues/blues.percussive.00000.au', sr=sr)
percussive1, sr = librosa.load(path='datasets/genres-hpss/blues/blues.percussive.00001.au', sr=sr)