# Project

## General idea:
Given two songs, an original and a sampled one, we can find what has been taken and where it has been placed.

## Method:
Fingerprint: split music into 1-second segments. find matches among pairs of segments.

### Part 1 
- Convert any wav to samples
- Delimit music features to use ()

### Part 2
- Compare music features between songs
- Check where in the song features are similar (timestamps)

Methods: audio matching with fingerprints? DTW?

### Part 3
- Automation, make functions to repeat this with any song

### References

http://www.eurasip.org/Proceedings/Eusipco/Eusipco2012/Conference/papers/1569556475.pdf

http://cmmr2012.eecs.qmul.ac.uk/sites/cmmr2012.eecs.qmul.ac.uk/files/pdf/papers/cmmr2012_submission_19.pdf

Master thesis: http://mtg.upf.edu/system/files/publications/Van-Balen-Jan-Master-thesis-2011_1.pdf

In [1]:
import numpy as np
import librosa
from matplotlib import pyplot as plt
import os
from scipy import signal
from scipy.fft import fft, fftshift, fftfreq

In [49]:
file_1 = "concalma_daddyyankeesnow.wav"
file_2 = "informer_snow.wav"
directory = ""
# audio_1, sample_rate = librosa.load(os.path.join(directory, file_1))
# audio_2, _ = librosa.load(os.path.join(directory, file_2), sr=sample_rate)

In [46]:
class Audio_fingerprint:

    def __init__(self, file, directory=""):
        self._filename = file
        self._audio, self._sample_rate = librosa.load(os.path.join(directory, file))
        self._duration = librosa.get_duration(y=self._audio, sr=self._sample_rate)
        self.get_fingerprints()
        
    def get_stft(self, size_fft=8192, duration_ms=50, hopsize_ms=10):
        ## Compute its dB magnitude short-time Fourier transform
        window_len = int(duration_ms*self._sample_rate*1e-3) # Hann windows of duration 50 ms
        hopsize_len = int(hopsize_ms*self._sample_rate*1e-3) # window hopsize of 10 ms
        zeropadding_len = window_len * 4 # zeropadding to four times the length of the window

        ## zeropadding the signal
        audio_zp = np.lib.pad(self._audio, (int(zeropadding_len/2),int(zeropadding_len/2)), 'constant', constant_values=(0, 0))

        stft_f, stft_t, stft_amp = signal.stft(audio_zp, self._sample_rate, nperseg=window_len, noverlap=window_len-hopsize_len, nfft=size_fft, boundary=None)
        stft_db = librosa.amplitude_to_db(np.abs(stft_amp))
        
        self._stft_f, self._stft_t, self._stft_amp, self._stft_db = stft_f, stft_t, stft_amp, stft_db
        print(f"File {self._filename} STFT completed.")
        # return stft_f, stft_t, stft_amp, stft_db
    
    def get_anchors(self, nband=25, delta_t=0.1):
        import math
        self.get_stft()
        duration = self._stft_t[-1] - self._stft_t[0]
        step_t = math.ceil(len(stft_t) / duration * delta_t)
        step_f = math.ceil(len(stft_f) / nband)
        nband_time = math.ceil(len(self._stft_t)/step_t)

        # subindexing and finding max
        anchors = []
        for i in range(nband):
            for j in range(nband_time):
                box = self._stft_db[i * step_f : (i + 1) * step_f, j * step_t : (j + 1) * step_t]
                max_idx = np.where(box == np.max(box))
                anchors.append((max_idx[1][0] + j * step_t, max_idx[0][0] + i * step_f))
                
        self._anchors = np.array(anchors)
        print(f"File {self._filename} anchors completed.")
        # return np.array(anchors)
    
    def get_fingerprints(self):
        self.get_anchors()
        #values from indexes
        max_time = self._stft_t[self._anchors[:, 0]]
        max_frequencies = self._stft_f[self._anchors[:, 1]]

        hash_set = []
        fingerprint_set = []
        for anchor in self._anchors:
            time_i, freq_i = anchor
            #time and frequency in base anchor
            time = self._stft_t[time_i]
            freq = self._stft_f[freq_i]
            zone_t = np.where(np.logical_and(max_time>=time+0.1, max_time<=time+0.6))
            zone_f = np.where(np.logical_and(max_frequencies>=freq*2**-0.5, max_frequencies<=freq*2**0.5))
            zone = np.intersect1d(zone_t, zone_f)
            for i in zone:
                delta_t = max_time[i] - time
                f2 = max_frequencies[i]
                hash_i = (freq, f2, delta_t)
                hash_set.append(hash_i)
                fingerprint_set.append((time, hash_i))
                
        self._fingerprint_set = tuple(fingerprint_set)
        print(f"File {self._filename} fingerprints completed.")
        # return tuple(fingerprint_set)
    
    def get_matches(self, fingerprint_q, eps=1e-5):
        matches = set()
        for fd_i in self._fingerprint_set:
            for fq_i in fingerprint_q:
                if (fd_i[0], fq_i[0]) in matches: continue
                h1 = fd_i[1]
                h2 = fq_i[1]
                # matching
                d = abs(h1[0] - h2[0])
                if d > eps: continue # saves a lot of time!!
                d += abs(h1[1] - h2[1])
                if d > eps: continue
                d += abs(h1[2] - h1[2])
                if d <= eps: matches.add((fd_i[0], fq_i[0]))

        return matches
    
    def get_segment(self, start=0.0, duration=1.0):
        fingerprint_segment = []
        for fgp in self._fingerprint_set:
            time = fgp[0]
            if start <= time <= start+duration:
                fingerprint_segment.append(fgp)

        return tuple(fingerprint_segment)

In [47]:
x = Audio_fingerprint(file_1)

File WakaWaka-Shakira.wav STFT completed.
File WakaWaka-Shakira.wav anchors completed.
File WakaWaka-Shakira.wav fingerprints completed.


In [48]:
segment = x.get_segment()

In [25]:
import math

len_x = len(x.fingerprint_set)
len_y = len(y.fingerprint_set)

##LETS SPLIT IN HALVES!!!
first_half_x = x.fingerprint_set[:math.ceil(len_x/2)]
second_half_x = x.fingerprint_set[math.ceil(len_x/2)+1:]
first_half_y= y.fingerprint_set[:math.ceil(len_y/2)]
second_half_y = y.fingerprint_set[math.ceil(len_y/2)+1:]

#print(len_x, len_y)

30010 44929


In [None]:
##Parallelogram
##little function
def matching_small(fd_i, fq_i):
    if (fd_i[0], fq_i[0]) in matches: return
    h1 = fd_i[1]
    h2 = fq_i[1]
    # matching
    d = abs(h1[0] - h2[0])
    if d > eps: return # saves a lot of time!!
    d += abs(h1[1] - h2[1])
    if d > eps: return
    d += abs(h1[2] - h1[2])
    if d <= eps: 
        return (fd_i[0], fq_i[0])


import multiprocessing as mp

with mp.Pool() as p:
    matches = p.imap(matching_small, [(i, j) for i in first_half_x for j in first_half_y])
