# Project

## General idea:
Given two songs, an original and a sampled one, we can find what has been taken and where it has been placed.

## Method:
Fingerprint: split music into 1-second segments. find matches among pairs of segments.

### Part 1 
- Convert any wav to samples
- Delimit music features to use ()

### Part 2
- Compare music features between songs
- Check where in the song features are similar (timestamps)

Methods: audio matching with fingerprints? DTW?

### Part 3
- Automation, make functions to repeat this with any song

### References

http://www.eurasip.org/Proceedings/Eusipco/Eusipco2012/Conference/papers/1569556475.pdf

http://cmmr2012.eecs.qmul.ac.uk/sites/cmmr2012.eecs.qmul.ac.uk/files/pdf/papers/cmmr2012_submission_19.pdf

Master thesis: http://mtg.upf.edu/system/files/publications/Van-Balen-Jan-Master-thesis-2011_1.pdf

In [18]:
import numpy as np
import librosa
from matplotlib import pyplot as plt
import os
from scipy import signal
from scipy.fft import fft, fftshift, fftfreq

In [15]:
file_1 = "WakaWaka-Shakira.wav"
file_2 = "ZaminaWakaWaka.wav"
directory = ""
audio_1, sample_rate = librosa.load(os.path.join(directory, file_1))
audio_2, _ = librosa.load(os.path.join(directory, file_2), sr=sample_rate)

In [None]:
def load_audio(file, directory=""):
    audio, sample_rate = librosa.load(os.path.join(directory, file))
    duration = librosa.get_duration(y=audio, sr=sample_rate)
    return audio, sample_rate, duration

In [52]:
def get_stft(query, sample_rate, size_fft=8192, duration_ms=50, hopsize_ms=10):
    ## Compute its dB magnitude short-time Fourier transform
    window_len = int(duration_ms*sample_rate*1e-3) #Hann windows of duration 50 ms
    hopsize_len = int(hopsize_ms*sample_rate*1e-3) #window hopsize of 10 ms
    zeropadding_len = window_len * 4 #zeropadding to four times the length of the window

    ## zeropadding the signal
    query = np.lib.pad(query, (int(zeropadding_len/2),int(zeropadding_len/2)), 'constant', constant_values=(0, 0))

    stft_f, stft_t, stft_amp = signal.stft(query, sample_rate, nperseg=window_len, noverlap=window_len-hopsize_len, nfft=size_fft, boundary=None)
    stft_db = librosa.amplitude_to_db(np.abs(stft_amp))
    return stft_f, stft_t, stft_amp, stft_db

In [49]:
def get_anchors(stft_f, stft_t, stft_db, sample_rate, nband=25, delta_t=0.1):
    import math
    duration = stft_t[-1] - stft_t[0]
    step_t = math.ceil(len(stft_t) / duration * delta_t)
    step_f = math.ceil(len(stft_f) / nband)
    nband_time = math.ceil(len(stft_t)/step_t)
    
    # subindexing and finding max
    anchors = []
    for i in range(nband):
        for j in range(nband_time):
            box = stft_db[i * step_f : (i + 1) * step_f, j * step_t : (j + 1) * step_t]
            max_idx = np.where(box == np.max(box))
            anchors.append((max_idx[1][0] + j * step_t, max_idx[0][0] + i * step_f))
            
    return np.array(anchors)

In [None]:
def get_fingerprints(stft_f, stft_t, anchors):
    #values from indexes
    max_time = stft_t[anchors[:, 0]]
    max_frequencies = stft_f[anchors[:, 1]]
    
    hash_set = []
    fingerprint_set = []
    for anchor in anchors:
        freq_i, time_i =  anchor
        #time and frequency in base anchor
        time = t[time_i]
        freq = f[freq_i]
        zone_t = np.where(np.logical_and(max_time>=time+0.1, max_time<=time+0.6))
        zone_f = np.where(np.logical_and(max_frequencies>=freq*2**-0.5, max_frequencies<=freq*2**0.5))
        zone = np.intersect1d(zone_t, zone_f)
        for i in zone:
            delta_t = max_time[i] - time
            f2 = max_frequencies[i]
            hash_i = (freq, f2, delta_t)
            hash_set.append(hash_i)
            fingerprint_set.append((time, hash_i))

    return tuple(fingerprint_set)

In [None]:
def get_matches(fingerprint_d, fingerprint_q, eps=1e-5):
    matches = set()
    for fd_i in fingerprint_d:
        for fq_i in fingerprint_q:
            if (fd_i[0], fq_i[0]) in matches: continue
            h1 = fd_i[1]
            h2 = fq_i[1]
            # matching
            d = abs(h1[0] - h2[0])
            if d > eps: continue # saves a lot of time!!
            d += abs(h1[1] - h2[1])
            if d > eps: continue
            d += abs(h1[2] - h1[2])
            if d <= eps: matches.add((fd_i[0], fq_i[0]))
            
    return matches