In [None]:
import numpy as np
from pydub import AudioSegment
from scipy.signal import spectrogram
from skimage.feature import peak_local_max
from skimage.measure import compare_ssim
from datasketch import MinHash, MinHashLSH
import pyaudio
from bokeh.io import show, output_notebook
from bokeh.plotting import figure 
import warnings
import sys

warnings.filterwarnings('ignore')
output_notebook()

In [None]:
class AudioFP():
    ## Initializing AudioFP object properties
    def __init__(self):
        audio_type = input('Do you wish to read from a file or record? Enter "r" to read and "a" to record: ')
        if audio_type == 'r':
            filename = input('Enter the filename you want to read including the extension: ')
            self.songname = filename
            self.channels = []
            self.framerate = []
            self.read_audiofile(plot=True)
        elif audio_type=='a':
            filename = input('Enter a name for the recording:')
            self.songname = filename
            self.channels = []
            self.framerate = []    
            self.record_audiofile(plot=True)
        else:
            sys.exit('Error: Incorrect entry. Enter "r" to read an audio file or "a" to record')

        
    ## Read audio file using pydub and plot signal
    def read_audiofile(self, plot):
        songdata = []
        audiofile = AudioSegment.from_file(self.songname)
        songdata = np.frombuffer(audiofile._data, np.int16)
        for chn in range(audiofile.channels):
            self.channels.append(songdata[chn::audiofile.channels])
        self.framerate = audiofile.frame_rate
        # Plot time signal
        if plot:
            p1 = figure(plot_width=900, plot_height=500, title='Audio Signal', 
                        x_axis_label='Time (s)', y_axis_label='Amplitude (arb. units)')
            time = np.linspace(0, len(self.channels[0])/self.framerate, len(self.channels[0]))
            p1.line(time[0::1000], self.channels[0][0::1000])
            show(p1)
            
    ## Record audio file using pyaudio and plot signal
    def record_audiofile(self, plot):
        rec_time = int(input('How long do you want to record? Enter time in seconds: '))
        start_rec = input('Do you want to start recoding? Enter "y" to start:')
        if start_rec=='y':
            chk_size = 8192  # chunk size
            fmt = pyaudio.paInt16  # format of audio 
            chan = 2  # Number of channels 
            samp_rate = 44100  # sampling rate
            self.framerate = samp_rate
            p = pyaudio.PyAudio()
            astream = p.open(format=fmt, channels=chan, rate=samp_rate,
                             input=True, frames_per_buffer=chk_size)
            songdata = []
            self.channels = [[] for i in range(chan)]
            for i in range(0, np.int(samp_rate / chk_size * rec_time)):
                songdata = astream.read(chk_size)
                nums = np.fromstring(songdata, dtype=np.int16)
                for c in range(chan):
                    self.channels[c].extend(nums[c::chan])
            # Close audio stream
            astream.stop_stream()
            astream.close()
            p.terminate()
        else:
            sys.exit('Audio recording did not start. Start over again.')
        # Plot time signal
        if plot:
            p1 = figure(plot_width=900, plot_height=500, title='Audio Signal', 
                        x_axis_label='Time (s)', y_axis_label='Amplitude (arb. units)')
            time = np.linspace(0, len(self.channels[0])/self.framerate, len(self.channels[0]))
            p1.line(time[0::100], self.channels[0][0::100])
            show(p1)
        
    ## Generate and plot spectrogram of audio data
    def generate_spectrogram(self, plot):
        audiosignal = np.sum(self.channels, axis=0) / len(self.channels)  # Averaging signal over all channels
        fs = self.framerate  # sampling rate
        window = 'hamming'  # window function
        nperseg = 10 * 256  # window size
        overlap_ratio = 0.5  # degree of overlap, larger number->more overlap, denser fingerprint
        noverlap = int(overlap_ratio * nperseg)  # number of points to overlap
        # generate spectrogram from consecutive FFTs over the defined window
        self.f, self.t, self.sgram = spectrogram(audiosignal, fs, window, nperseg, noverlap)  
        self.sgram = 10 * np.log10(self.sgram)  # transmorm linear output to dB scale 
        self.sgram[self.sgram == -np.inf] = 0  # replace infs with zeros
        # Plot Spectrogram
        if plot:
            p2 = figure(plot_width=900, plot_height=500, title='Spectrogram',
                        x_axis_label='Time (s)', y_axis_label='Frequency (Hz)',
                        x_range=(min(self.t), max(self.t)), y_range=(min(self.f), max(self.f)))
            p2.image([self.sgram[::2, ::2]], x=min(self.t), y=min(self.f), 
                     dw=max(self.t), dh=max(self.f), palette='Spectral11')
            show(p2)
        
    ## Find peaks in the spectrogram using image processing
    def find_peaks(self, plot):
        min_peak_sep = 20  # larger sep -> less peaks -> less accuracy, but faster fingerprinting
        min_peak_amp = 15  # larger min amp -> less peaks -> less accuracy, but faster fingerprinting
        coordinates = peak_local_max(self.sgram, min_distance=min_peak_sep, indices=True,
                                     threshold_abs=min_peak_amp)
        
        self.peaks = self.sgram[coordinates[:, 0], coordinates[:, 1]]
        self.tp = self.t[coordinates[:, 1]]
        self.fp = self.f[coordinates[:, 0]]
        # Plot the peaks detected on the spectrogram
        if plot:
            p3 = figure(plot_width=900, plot_height=500, title='Spectrogram with Peaks',
                        x_axis_label='Time (s)', y_axis_label='Frequency (Hz)',
                        x_range=(min(self.t), max(self.t)), y_range=(min(self.f), max(self.f)))
            p3.image([self.sgram[::2, ::2]], x=min(self.t), y=min(self.f), 
                     dw=max(self.t), dh=max(self.f), palette='Spectral11')
            p3.scatter(self.tp, self.fp)
            show(p3)
        
    ## Use the peak data from the spectrogram to generate a string with pairs of 
    ## peak frequencies and the time delta between them 
    def generate_fingerprint(self, plot):
        peak_connectivity = 5  # Number of neighboring peaks to use as target for each anchor
        peak_time_delta_min = 0  # Minimum spacing in time between peaks for anchor and target
        peak_time_delta_max = 20  # Maximum spacing in time between peaks for anchor and target
        # Create the the data to be used for fingerprinting
        # for each frequency (anchor) find the next few frequencies (targets) and calculate their time deltas
        # the anchor-target frequency pairs and their time deltas will be used to generate the fingerprints
        s = []  # Empty list to contain data for fingerprint
        for i in range(len(self.peaks)):
            for j in range(1, peak_connectivity):
                if (i + j) < len(self.peaks):
                    f1 = self.fp[i]
                    f2 = self.fp[i + j]
                    t1 = self.tp[i]
                    t2 = self.tp[i + j]
                    t_delta = t2 - t1
                    if t_delta >= peak_time_delta_min and t_delta <= peak_time_delta_max:
                        s.append(str(np.rint(f1)) + str(np.rint(f2)) + str(np.rint(t_delta)))
        self.fingerprint = MinHash(num_perm=128)
        for data in s:
            self.fingerprint.update(data.encode('utf8'))
        if plot:
            print('{} audio-fingprint: '.format(self.songname))
            print(self.fingerprint.digest())

In [None]:
## Compare fingerprints of two songs and calculate percentage of match
def compare_fingerprints(s1, s2, cmp_type):
    sim_threshold = 0.5
    lsh = MinHashLSH(threshold=sim_threshold, num_perm=128)
    lsh.insert('song1', s1)
    lsh_result = lsh.query(s2)
    if not lsh_result:
        print('Not a match, Jaccard similarity < {}'.format(sim_threshold))
    else:
        print('Match, Jaccard similarity > {}'.format(sim_threshold))

In [None]:
# Create song object. Select whether to read an audiofile from file or record using the microphone.
# Provide full file name including extension.
# Plots the audio signal
song1 = AudioFP()

In [None]:
# Use audio signal from above file to generate spectrogram
song1.generate_spectrogram(plot=True)

In [None]:
# Find local maxima in the spectrogram to generate the audio fingerprint 
song1.find_peaks(plot=True)

In [None]:
# Use peaks in the spectrogram to generate a fingerprint for the audio signal
song1.generate_fingerprint(plot=True)

In [None]:
song2 = AudioFP()
song2.generate_spectrogram(plot=False)
song2.find_peaks(plot=False)
song2.generate_fingerprint(plot=False)

In [None]:
compare_fingerprints(song1.fingerprint, song2.fingerprint, 'lsh')