In [19]:
import IPython
from IPython.display import Image
import random
import librosa
import string
import ffmpy
import os
import matplotlib.pyplot as plt
%matplotlib inline
from liveid import *
import datetime

ap = parameters()

def load_file(file_name, scale=1):
    x, sr = librosa.load(file_name)
    ap['sample_rate'] = sr*scale
    cqt, fp = fingerprint(x, ap)
    return x, sr, cqt, fp

def show_figure(fig_path, width):
    IPython.display.display(Image(fig_path, width = width))

def load_and_display(audio_path, title, aspect):
    audio_signal, sr, audio_cqt, audio_fp = load_file(audio_path)
    display_spectrogram_and_audio(audio_cqt, audio_fp, audio_signal, sr, title, aspect)
    
def display_spectrogram(stft, title, cmap, aspect):
    notes = ['C3', 'G3', 'C4', 'G4', 'C5', 'G5', 'C6', 'G6']
    numbers = np.array([0, 7, 13, 19, 25, 31, 37, 43])*2
    plt.figure(figsize=(15, 8))
    plt.imshow(stft, origin='lower', cmap=cmap, aspect = aspect)
    plt.title(title)
    plt.xticks(range(stft.shape[1])[::50], [str(datetime.timedelta(seconds=np.round(i / 10)))[3:] 
                                   for i in range(stft.shape[1])][::50], ha='left', rotation='vertical')
    plt.ylim([0, stft.shape[0]])
    plt.yticks(numbers, notes, va='bottom')
    plt.ylabel('Pitch')
    plt.xlabel('Time')
    plt.show()
    
def display_spectrogram_and_audio(cqt, fp, audio_signal, sr, title, aspect):
    display_spectrogram(cqt, title, 'PuRd', aspect)
    display_spectrogram(fp, title, 'Greys', aspect)
    audio(audio_signal, sr)

def audio(d, sr):
    tmp_file = 'tmp.mp3'
    file_path = 'tmp.wav'
    librosa.output.write_wav('tmp.wav', d, sr)
    ff = ffmpy.FFmpeg(
        inputs={file_path: None},
        outputs={tmp_file: None})
    ff.run()
    IPython.display.display(IPython.display.Audio(tmp_file))
    os.remove('tmp.mp3')
    os.remove('tmp.wav')
    
from IPython.core.display import HTML
HTML("""
<style>
audio {
    width: 100%;
}
.output_png {
    text-align: center;
}
</style>
""")

In [20]:
from scipy.ndimage.filters import gaussian_filter

fingerprint_parameters = {
    'window_seconds': 20,
    'hop_factor': .2
}

def fingerprint_fft(raw_fingerprint, audio_parameters, fingerprint_parameters):
    window_seconds = fingerprint_parameters['window_seconds']
    hop_factor = fingerprint_parameters['hop_factor']
    sample_rate = audio_parameters['sample_rate']
    window_length = window_seconds*audio_parameters['time_resolution']
    ffts = []
    start = 0
    while start <= raw_fingerprint.shape[1] - window_length:
        fft = np.fft.fft2(raw_fingerprint[:, start:start+window_length])
        fft = gaussian_filter(np.abs(np.fft.fftshift(fft)), sigma=.375)
        #fft = np.abs(np.fft.fftshift(fft))
        ffts.append(fft)
        start += int(window_length * hop_factor)   
    return ffts

In [25]:
#song_fp = load_file("../../coversong_identification/datasets/Elvis Presley - Can't Help Falling In Love/Can't Help Falling In Love-5V430M59Yn8.mp3")[-1]
#cover_fp = load_file("../../coversong_identification/datasets/Elvis Presley - Can't Help Falling In Love/Can't Help Falling in Love - Elvis Presley (Ukulele Cover by Stormy Amorette)-qlwnRhlh7ts.mp3")[-1]

song_fp = load_file("../data/test_data/Original.mp3")
cover_fp = load_file("../data/test_data/Cover.mp3")

In [26]:
song_ffts = fingerprint_fft(song_fp, ap, fingerprint_parameters)
cover_ffts = fingerprint_fft(cover_fp, ap, fingerprint_parameters)
plt.figure(figsize=(20, 4))
plt.imshow(np.log(np.hstack(song_ffts))**.5, aspect = 10)
plt.title("Can't Help Falling In Love With You - Original")
plt.show()

# plt.figure(figsize=(20, 4))
# plt.imshow(np.log(np.hstack(cover_ffts))**.5, aspect = 10)
# plt.title("Can't Help Falling In Love With You - Pitch shifted")
# plt.show()

plt.figure(figsize=(20, 4))
plt.imshow(np.log(np.hstack(cover_ffts))**.5, aspect = 10)
plt.title("Can't Help Falling In Love With You - Cover (ukelele)")
plt.show()

AttributeError: 'tuple' object has no attribute 'shape'