In [1]:
import librosa 

In [2]:
filename = librosa.example('nutcracker')

Downloading file 'Kevin_MacLeod_-_P_I_Tchaikovsky_Dance_of_the_Sugar_Plum_Fairy.ogg' from 'https://librosa.org/data/audio/Kevin_MacLeod_-_P_I_Tchaikovsky_Dance_of_the_Sugar_Plum_Fairy.ogg' to 'C:\Users\PC\AppData\Local\librosa\librosa\Cache'.


In [3]:
# Load the audio as a waveform y
# Store the sampling rate as sr 

y, sr = librosa.load(filename)


In [5]:
#Run the default beat tracker 

tempo, beat_frames = librosa.beat.beat_track(y=y,sr=sr)

print('Estimated tempo: {:.2f} beats per minute'.format(tempo))

Estimated tempo: 107.67 beats per minute


In [6]:
# 4. Convert the frame indices of beat events into timestamps 

beat_times = librosa.frames_to_time(beat_frames, sr=sr)

In [7]:
beat_times

array([  1.18421769,   1.71827664,   2.32199546,   2.87927438,
         3.45977324,   4.01705215,   4.59755102,   5.13160998,
         5.7353288 ,   6.29260771,   6.84988662,   7.40716553,
         7.9876644 ,   8.54494331,   9.12544218,   9.65950113,
        10.21678005,  10.72761905,  11.28489796,  11.79573696,
        12.32979592,  12.86385488,  13.42113379,  13.95519274,
        14.4892517 ,  15.02331066,  15.55736961,  16.09142857,
        16.62548753,  17.15954649,  17.69360544,  18.25088435,
        18.80816327,  19.31900227,  19.87628118,  20.38712018,
        20.92117914,  21.4552381 ,  21.98929705,  22.52335601,
        23.05741497,  23.59147392,  24.12553288,  24.65959184,
        25.19365079,  25.72770975,  26.26176871,  26.81904762,
        27.35310658,  27.88716553,  28.44444444,  29.00172336,
        29.55900227,  30.11628118,  30.67356009,  31.20761905,
        31.78811791,  32.34539683,  32.85623583,  33.36707483,
        33.90113379,  34.43519274,  34.94603175,  35.45

In [9]:
import numpy as np 
import librosa 

y, sr = librosa.load(librosa.ex('nutcracker'))

#Set the hop length; at 22050 Hz, 512 samples ~= 23 ms 
hop_length = 512 

#Separate harmonics and percussives into two waveforms [hpss : harmonic-percussive separation]

y_harmonic, y_percussive = librosa.effects.hpss(y)

#Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)




In [10]:
#Compute MFCC features from the raw signal 
# MFCC: Mel-frequency cepstral coefficients

mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

#And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)

#Stack and synchronize between beat events 
#This time, we'll use the mean value (default) instead of median 
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc,mfcc_delta]), beat_frames)

#Compute chroma features from the harmonic signal 
chromagram = librosa.feature.chroma_cqt(y=y_harmonic,
                                        sr=sr)

In [12]:
#Aggregate chroma features between beat events
#We'll use the median value of each feature between beat frames 

beat_chroma = librosa.util.sync(chromagram,beat_frames, aggregate=np.median)

#Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])