### Generate MP3ToVec embedding for 30s samples of Spotify songs

In [None]:
import keras
from keras.models import load_model
import os
import numpy as np
import librosa
import pickle
from tqdm import tqdm
import requests
import pickle
import csv
from IPython.display import clear_output

import gensim
from gensim.models.callbacks import CallbackAny2Vec
class logger(CallbackAny2Vec):
    None
    
embedding_model = gensim.models.Word2Vec.load('word2vec.model')
embedding = embedding_model.wv.syn0

model = load_model('..\speccy_model')
batch_size = 100
sr         = 22050
n_fft      = 2048
hop_length = 512
n_mels     = model.layers[0].input_shape[1]
slice_size = model.layers[0].input_shape[2]
slice_time = slice_size * hop_length / sr
epsilon_distance = 0.001

try:
    # key will be Spotify id
    mp3tovecs = pickle.load(open('../Pickles_Spotify/spotifytovec.p', 'rb'))
    tracktovecs = pickle.load(open('../Pickles_Spotify/tracktovec.p', 'rb'))
except:
    mp3tovecs = {}
    tracktovecs = {}
tracks = []
with open('popular_tracks.csv', "r", encoding='utf-8') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=';')
    for row in spamreader:
        columns = str(row)[2:-2].split(';')
        if columns[0] not in mp3tovecs and columns[3] != '' and columns[3][:5] == 'https':
            tracks.append((columns[0], columns[1] + ' - ' + columns[2], columns[3]))
num_done = len(mp3tovecs)
indices = np.random.permutation(len(tracks))

for batch_num in range(len(indices)//batch_size + 1):
    clear_output(True)
    print(f'Batch {batch_num + 1 + num_done//batch_size} out of {len(indices)//batch_size + 1 + num_done//batch_size}')
    batch = [tracks[idx] for idx in indices[batch_num * batch_size: min((batch_num+1) * batch_size, len(indices))]]
    mp3s = {}
    try:
        with tqdm(batch) as t:
            for track in t:
                r = requests.get(track[2], allow_redirects=True)
                open('temp.mp3', 'wb').write(r.content)
                try:
                    y, sr = librosa.load('temp.mp3', mono=True)
                    if y.shape[0] < slice_size:
                        print(f'Skipping {track[1]}')
                        continue
                    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, fmax=sr/2)
                    # hack because Spotify samples are a shade under 30s
                    x = np.ndarray(shape=(S.shape[1] // slice_size + 1, n_mels, slice_size, 1), dtype=float)
                    for slice in range(S.shape[1] // slice_size):
                        log_S = librosa.power_to_db(S[:, slice * slice_size : (slice+1) * slice_size], ref=np.max)
                        if np.max(log_S) - np.min(log_S) != 0:
                            log_S = (log_S - np.min(log_S)) / (np.max(log_S) - np.min(log_S))
                        x[slice, :, :, 0] = log_S
                    # hack because Spotify samples are a shade under 30s
                    log_S = librosa.power_to_db(S[:, -slice_size : ], ref=np.max)
                    if np.max(log_S) - np.min(log_S) != 0:
                        log_S = (log_S - np.min(log_S)) / (np.max(log_S) - np.min(log_S))
                    x[-1, :, :, 0] = log_S
                except:
                    print(f'Skipping {track[1]}')
                    continue
                mp3s[track[0]] = model.predict(x)
    except KeyboardInterrupt:
        t.close() # stop the progress bar from sprawling all over the place after a keyboard interrupt
        raise
    t.close()
    mp3_vecs = []
    mp3_indices = {}
    for mp3 in mp3s:
        mp3_indices[mp3] = []
        for mp3_vec in mp3s[mp3]:
            mp3_indices[mp3].append(len(mp3_vecs))
            mp3_vecs.append(mp3_vec / np.linalg.norm(mp3_vec)) # normalize
    num_mp3_vecs = len(mp3_vecs)
    # this takes up a lot of memory
    cos_distances = np.ndarray((num_mp3_vecs, num_mp3_vecs), dtype=np.float16)
    print(f'Precalculating cosine distances')
    # this needs speeding up
    try:
        with tqdm(mp3_vecs, unit="vector") as t:
            for i, mp3_vec_i in enumerate(t):
                for j , mp3_vec_j in enumerate(mp3_vecs):
                    if i > j:
                        cos_distances[i, j] = cos_distances[j, i] # I've been here before
                    elif i < j:
                        cos_distances[i, j] = 1 - np.dot(mp3_vec_i, mp3_vec_j)
                    else:
                        cos_distances[i, j] = 0 # i == j
    except KeyboardInterrupt:
        t.close() # stop the progress bar from sprawling all over the place after a keyboard interrupt
        raise
    t.close()        
    print(f'Calculating IDF weights')
    idfs = []
    try:
        with tqdm(range(num_mp3_vecs), unit="vector") as t:
            for i in t:
                idf = 0
                for mp3 in mp3s:
                    for j in mp3_indices[mp3]:
                        if cos_distances[i, j] < epsilon_distance:
                            idf += 1 
                            break
                idfs.append(-np.log(idf / len(mp3s)))
    except KeyboardInterrupt:
        t.close() # stop the progress bar from sprawling all over the place after a keyboard interrupt
        raise
    t.close()
    print(f'Calculating TF weights')
    try:
        with tqdm(mp3s, unit="mp3") as t:
            for mp3 in t:
                vec = 0
                for i in mp3_indices[mp3]:
                    tf = 0
                    for j in mp3_indices[mp3]:
                        if cos_distances[i, j] < epsilon_distance:
                            tf += 1
                    vec += mp3_vecs[i] * tf * idfs[i]
                    mp3tovecs[mp3] = vec
                    tracktovecs[mp3] = embedding[embedding_model.wv.vocab[mp3].index]
    except KeyboardInterrupt:
        t.close() # stop the progress bar from sprawling all over the place after a keyboard interrupt
        raise
    t.close()
    pickle.dump(mp3tovecs, open('../Pickles_Spotify/spotifytovec.p', 'wb'))
    pickle.dump(tracktovecs, open('../Pickles_Spotify/tracktovec.p', 'wb'))