In [191]:
import librosa
import numpy as np
import scipy
from sklearn.neighbors import NearestNeighbors
import pydub
import math

In [192]:
#replace with the name of the input file
original_wav = "furelise.wav"

In [222]:
songs_wav = ["furelise1.wav", "furelise2.wav", "furelise3.wav", "furelise4.wav", 
             "moonlight1.wav", "moonlight2.wav", "moonlight3.wav", "moonlight4.wav", 
             "metal1.wav", "metal2.wav", "metal3.wav", "metal4.wav", 
            "skrillex.wav"]

In [223]:
original, sample_rate = librosa.load("songs/" + original_wav)

In [224]:
song_data = [original] + [librosa.load("songs/" + song)[0] for song in songs_wav]

In [225]:
def remove_zeros(vec):
    temp = np.transpose(vec == 0)
    indices = np.argwhere(temp == False)
    return vec[indices[0][0]:indices[len(indices) - 1][0]]

In [226]:
songs = [remove_zeros(s) for s in song_data] 

In [227]:
fft_frame_size = 2000

In [228]:
def get_fft_chunks(time_data):
    num_samples= len(time_data)//fft_frame_size
    return [np.fft.fft(time_data[i*fft_frame_size:(i+1)*fft_frame_size]) for i in range(num_samples)]

In [229]:
# original_fft = get_fft_chunks(remove_zeros(songs[0]))

In [230]:
#testing the stackoverflow code
#2000 is the length of one window
freq = np.fft.fftfreq(fft_frame_size)
fft_freqs = [abs(freq[i]*sample_rate) for i in range(fft_frame_size)]

In [231]:
fft_freqs

[0.0,
 11.025,
 22.050000000000001,
 33.075000000000003,
 44.100000000000001,
 55.125,
 66.150000000000006,
 77.174999999999997,
 88.200000000000003,
 99.225000000000009,
 110.25,
 121.27499999999999,
 132.30000000000001,
 143.32500000000002,
 154.34999999999999,
 165.375,
 176.40000000000001,
 187.42500000000001,
 198.45000000000002,
 209.47499999999999,
 220.5,
 231.52500000000001,
 242.54999999999998,
 253.57499999999999,
 264.60000000000002,
 275.625,
 286.65000000000003,
 297.67500000000001,
 308.69999999999999,
 319.72500000000002,
 330.75,
 341.77499999999998,
 352.80000000000001,
 363.82500000000005,
 374.85000000000002,
 385.87500000000006,
 396.90000000000003,
 407.92499999999995,
 418.94999999999999,
 429.97500000000002,
 441.0,
 452.02500000000003,
 463.05000000000001,
 474.07500000000005,
 485.09999999999997,
 496.125,
 507.14999999999998,
 518.17499999999995,
 529.20000000000005,
 540.22500000000002,
 551.25,
 562.27500000000009,
 573.30000000000007,
 584.32499999999993,


In [232]:
ranges = [40, 80, 120, 180, 300]

In [233]:
def get_magnitudes(fft):
    #return high_mags, a 2d array
    #high_mags[i][0] is for 0-40
    #high_mags[i][1] is for 40-80
    #etc. where i is in range(len(fft)), or each fft window 
    high_mags = [np.zeros(len(ranges)) for k in range(len(fft))]
    
    def max_mag_in_window(index, high_mags):
        nonlocal i
        #tuple in form (current highest magnitude, index of current highest magnitude)
        mag = (0, 0)
        while(fft_freqs[i] < ranges[index]): 
            curr_mag = math.log10(abs(window[i])+1)
            if curr_mag > mag[0]:
                mag = (curr_mag, i)
            i += 1
            high_mags[fft_window][index] = fft_freqs[mag[1]]
            
    for fft_window in range(len(fft)):
        window = fft[fft_window]
        i = 0
        #find the maximum magnitudes in each window of ranges (0-40, 40-80, etc.)
        for j in range(len(ranges)):
            max_mag_in_window(j, high_mags)
    return high_mags

In [234]:
#a key is a set of 5 magnitudes that are the greatest in the ranges 
#0-40, 40-80, 80-120, 120-180, and 180-300 Hz
#the values are an array of tuples of type (time, song name)
#where time is the "window" (an index) and song is a string such as "furelise"

def populate_database(mags, database, song_name):
    for i in range(len(mags)): #i = index of "window" so it corresponds to what we can consider "time" i suppose
        key = str(mags[i])
        if key not in database:
            database[key] = {}
        if song_name not in database[key]:
            database[key][song_name] = []
        database[key][song_name].append(i)
    

In [235]:
database = {}
#songs[0] is the original/input, we want to process it separately
for index, song in enumerate(songs[1:]):
    print("adding song ", songs_wav[index][:-4], "...")
    
    original_fft = get_fft_chunks(song)
    mags = get_magnitudes(original_fft)
    populate_database(mags, database, songs_wav[index][:-4])
    

adding song  furelise1 ...
adding song  furelise2 ...
adding song  furelise3 ...
adding song  furelise4 ...
adding song  moonlight1 ...
adding song  moonlight2 ...
adding song  moonlight3 ...
adding song  moonlight4 ...
adding song  metal1 ...
adding song  metal2 ...
adding song  metal3 ...
adding song  metal4 ...
adding song  skrillex ...


In [236]:
#process original/input song 
original_fingerprint = {}
original_fft = get_fft_chunks(original)
mags = get_magnitudes(original_fft)
populate_database(mags, original_fingerprint, original_wav)

In [237]:
similarities = {key[:-4]:0 for key in songs_wav}

In [238]:


print(database['[  33.075   44.1    110.25   121.275  231.525]'])

{'furelise1': [0, 1673, 2356], 'furelise2': [959, 1511], 'furelise3': [1524], 'furelise4': [1299], 'moonlight3': [747], 'metal1': [23, 28], 'skrillex': [286, 322, 624, 811, 875, 1270, 1273, 1295, 1296, 1332, 1521, 1613]}


In [239]:
#for each set of 5 notes in the original/input song, check if in other songs
            
for key in original_fingerprint.keys():
    if key in database:
        for song_name in database[key]:
            similarities[song_name] += len(database[key][song_name])

In [240]:
similarities

{'furelise1': 1200,
 'furelise2': 1248,
 'furelise3': 1145,
 'furelise4': 955,
 'metal1': 58,
 'metal2': 36,
 'metal3': 106,
 'metal4': 84,
 'moonlight1': 900,
 'moonlight2': 520,
 'moonlight3': 1187,
 'moonlight4': 750,
 'skrillex': 494}

In [241]:
def knn(k, sim_dict):
    sorted_dict = sorted(sim_dict, key=sim_dict.get, reverse=True)[:k]
    counts = {}
    for s in sorted_dict:
        name = s[:-1]
        if name not in counts:
            counts[name] = 0
        counts[name] += 1
    return max(counts, key=counts.get)

knn(4, similarities)

'furelise'

In [242]:
print(max(similarities, key=similarities.get))

furelise2


In [243]:
# similarities = {key[:-4]:0 for key in songs_wav}

In [244]:
# for key in original_fingerprint.keys():
#     original_indices = original_fingerprint[key][original_wav]
#     if key in database:
#         for song_name in database[key]:
#             database_indices = database[key][song_name]
#             differences = [original - database for original, database in zip(original_indices, database_indices)]
#             relative_differences = np.mean(np.array(differences))
#             sim = np.linalg.norm(np.array(differences - relative_differences))
#             if sim < 100:
#                 similarities[song_name] += 1
#             elif sim < 1000:
#                 similarities[song_name] += 0.5
#             else:
#                 similarities[song_name] += 0.1


In [217]:
# print(similarities)

In [218]:
# print(max(similarities, key=similarities.get))