In [None]:
from IPython.display import Audio
import aifc
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (10.0, 8.0)
%matplotlib inline

In [None]:
import os
os.listdir('test_sounds')

In [None]:
!mkdir -p test_sounds

In [None]:
pure_notes = [
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20C0.mp3',
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20C1.mp3',
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20C2.mp3',
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20C3.mp3',
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20C4.mp3',
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20C5.mp3',
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20G1.mp3',
    'http://www.emstechlab.com/library/sound/Instruments%20Various/PIANO%20G2.mp3',
]
import re
pattern = re.compile(r'.*%20((.*).mp3)')
filename_url_map = {pattern.match(x).group(1) : 
                 pattern.match(x).group(0) for x in pure_notes}

for fname, url in filename_url_map.iteritems():
    !curl $url >> test_sounds/$fname

In [None]:
note_signal_map = {}
for note_name in os.listdir('test_sounds'):
    filepath = os.path.join('test_sounds', note_name)
    note_signal_map[note_name.replace('.mp3', '')] = librosa.load(filepath)
note_signal_map

In [None]:
for note_name, (signal, _) in note_signal_map.iteritems():
    pitches, magnitudes = librosa.core.piptrack(signal)

    f_max, t_max = np.unravel_index(np.argmax(magnitudes), magnitudes.shape)
    print 'target = {}'.format(note_name)
    print pitches[f_max, t_max]
    print librosa.core.hz_to_note(pitches[f_max, t_max])

In [None]:
import aubio
dir(aubio)

In [2]:
import sys, librosa
from aubio import source, onset


def get_onset_times(filepath, sr=22050, win_s = 512,
                   method='default'):
    """
    INPUT:
    filepath - path to audio file
    sr - sample rate
    win_s - fft window size
    method - see http://aubio.org/manpages/latest/aubionotes.1.html for methods offered
    
    OUTPUT:
    onset_times - array_type - times for when notes are believed to begin
    """
    hop_s = win_s / 2           # hop size

    filename = filepath
    samplerate = sr

    #set up the aubio system calls
    s = source(filename, samplerate, hop_s)
    samplerate = s.samplerate

    o = onset(method, win_s, hop_s, samplerate)

    # list of onsets, in samples
    onsets = []

    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        if o(samples):
            #print "%f" % o.get_last_s()
            onsets.append(o.get_last())
        total_frames += read
        if read < hop_s: break

    #convert the onset samples to the corresponding time of occurrence and return
    return librosa.core.samples_to_time(onsets, sr=samplerate)

def get_pitch_for_times(filepath, times, sr=22050,
                        downsample=1, win_s=4096, hop_s=512, threshold=0.8,
                       pitch_algorithm='yin',
                       pitch_unit='Hz'):
    """
    INPUT:
    filepath - string - path to audio file for which to extract pitch values
    times - array-type - times for which to extract pitch values (typically onset times)
    sr - sample rate - divided by downsample parameter
    downsample - downsample factor
    win_s - fft window size, is divided by downsample parameter
    hop_s - hop size, is divided by downsample parameter
    threshold - confidence threshold that must be exceed before assigning a pitch value
    pitch_algorithm - one of [default, schmitt, fcomb, mcomb, specacf, yin, yinfft]
    
    OUTPUT:
    pitch_values - pitch values for musical events beginning at each time in times
    
    NOTE: if no pitch is detected with confidence above 'threshold' between times[i] and times[i+1],
    a pitch value of 0 will be assigned
    """
    
    from aubio import source, pitch, freqtomidi

    filename = filepath
    samplerate = sr / downsample
    win_s = win_s / downsample # fft size
    hop_s = hop_s  / downsample # hop size
    tolerance = threshold
    
    target_samples = librosa.core.time_to_samples(times)
    
    s = source(filename, samplerate, hop_s)
    samplerate = s.samplerate

    pitch_o = pitch(pitch_algorithm, win_s, hop_s, samplerate)
    pitch_o.set_unit(pitch_unit)
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []

    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        if read == 0: break
        pitch = pitch_o(samples)[0]
        confidence = pitch_o.get_confidence()
        print '{},{}'.format(pitch,confidence)
        pitches.append(pitch)
        confidences.append(confidence)
        total_frames += read
        if read < hop_s: break


    #the pitches that we will return for the passed in times
    target_pitches = []
    for _sample_index, _target_sample in enumerate(target_samples):
        #did we find a pitch we were confident about for this sample?
        _set_flag = False
        _working_sample = _target_sample #we want to find a confident pitch
        while (not _set_flag) and _working_sample < len(pitches) and\
            (_sample_index == len(target_samples) - 1 or\
             _working_sample < target_samples[_sample_index + 1]):
                if confidences[_working_sample] >= threshold:
                    _set_flag = True
                    target_pitches.append(pitches[_working_sample])
        
        if not _set_flag:
            target_pitches.append(0)
    return target_pitches


sound_path = 'test_sounds/C4.mp3'
get_pitch_for_times(sound_path, get_onset_times(sound_path))

0.0,0.0
0.0,0.0
0.0,0.0
0.0,0.0
530.650512695,0.581795752048
531.385986328,0.654402911663
531.40045166,0.6820525527
531.365234375,0.697379410267
531.585021973,0.797416090965
531.132995605,0.844552874565
530.458862305,0.874559402466
529.753845215,0.923275649548
528.931762695,0.955531597137
528.394775391,0.9719004035
528.19342041,0.980644762516
527.919677734,0.982390761375
527.84552002,0.981659054756
527.469299316,0.980647325516
527.582824707,0.980270922184
527.544250488,0.982501327991
527.54473877,0.985568642616
527.751647949,0.989246547222
527.614135742,0.989926338196
527.831359863,0.986980378628
527.670593262,0.98440104723
527.853637695,0.97636950016
528.006591797,0.969183564186
528.080444336,0.965604364872
528.229309082,0.969976961613
528.143737793,0.979882419109
528.190185547,0.9875177145
528.393249512,0.9883056283
528.471740723,0.986840724945
528.473937988,0.98606133461
528.498046875,0.986815571785
528.409179688,0.98792386055
528.444458008,0.990083754063
528.375427246,0.99183684587

KeyboardInterrupt: 

In [None]:
plt.plot(np.abs(librosa.core.stft(middle_c)))