In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

import pickle
import time

from gensim.models import KeyedVectors

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model
import keras.backend as K

from evaluation_metrics import r_precision

2022-02-24 08:16:15.241688: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-02-24 08:16:15.241706: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Path to project data
data_path = '/recsys/data/spotify/s2v_data/'
# Path to trian and test file
pickle_file = 'sentences_and_maps.pickle'
# Path to Word2Vec embeddings
vectors_file = 'song_vectors.kv'
# Path to encoder dataset which we create here if it doesnt exist
encoder_dataset = 'encoder_dataset.pickle'

In [3]:
with open(data_path + pickle_file, 'rb') as handle:
    train, test, track_data_map, reverse_track_lookup = pickle.load(handle)

In [4]:
train = [val.split() for val in train]
test = [val.split() for val in test]

wv = KeyedVectors.load(data_path + vectors_file)

## Dataset creation
- Dataset is created as (song1_embedding, song2_embedding) for songs found one after another if both sonds have embeddings
- We created 2 datasets, 1 of 500K examples and the other of 1M examples

In [5]:
# creating train and validation sets
# taking 500 for train, 50 for validation
try:
    with open(data_path + encoder_dataset, 'rb') as handle:
        train_in, train_out, val_in, val_out = pickle.load(handle)

except Exception as e:   
    print('Loading failed: {}'.format(e))
    print('Creating dataset')
    train_in, train_out, val_in, val_out = [], [], [], []

    train_size = 0
    validation_size = 0
    add_to_train = True

    for pl in train:
        prev_song = None

        if len(train_in) >= 1000000:
            add_to_train = False
        if len(val_in) >= 100000:
            break

        for s in pl:
            if s in wv.index_to_key:
                if prev_song:
                    if add_to_train:
                        train_in.append(wv.get_vector(prev_song))
                        train_out.append(wv.get_vector(s))
                    else:
                        val_in.append(wv.get_vector(prev_song))
                        val_out.append(wv.get_vector(s))

                prev_song = s
            else:
                prev_song = None

    data = train_in, train_out, val_in, val_out
    with open(data_path + encoder_dataset, 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [6]:
train_in, train_out= np.array(train_in), np.array(train_out)
val_in, val_out = np.array(val_in), np.array(val_out)

In [7]:
train_in = train_in.reshape(train_in.shape[0], 64, 1)
train_out = train_out.reshape(train_in.shape[0], 64, 1)
val_in = val_in.reshape(val_in.shape[0], 64, 1)
val_out = val_out.reshape(val_in.shape[0], 64, 1)

## Model
- The model is an Encoder Decoder model
- The input is a song embedding
- The output is also a song embedding

- The target is given a song, predict the embedding of the next song

In [8]:
class ProdEmbToEmb(Model):
  def __init__(self):
    super(ProdEmbToEmb, self).__init__()
    self.encoder = tf.keras.Sequential([
      layers.Input(shape=(64, 1)),
      layers.Flatten(),
      layers.Dense(32, activation='selu'),
      layers.Dropout(0.2),
      layers.Dense(32, activation='selu')])

    self.decoder = tf.keras.Sequential([
      layers.Dense(32, activation='selu'),
      layers.Dropout(0.3),
      layers.Dense(64, activation='selu'),
      layers.Dropout(0.3),
      layers.Dense(64, activation='linear'),
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

autoencoder = ProdEmbToEmb()

2022-02-24 08:16:30.371843: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-02-24 08:16:30.371884: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kal-syl): /proc/driver/nvidia/version does not exist
2022-02-24 08:16:30.372244: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Prediction
prediction is done as follows:
- Given a song we encode it end decode it using the model
- Then we search for the nearest embeddings to the decoded vector
- We recommend the song with the nearest embeddings

In [9]:
def predict_using_similarity_to_decoding(s_sample, k, wv):
    songs_in_dict = []
    all_results = []
    for s in s_sample:
        if s in wv.key_to_index:
            songs_in_dict.append(s)
    
    if songs_in_dict:
        for s in songs_in_dict:
            vec = wv.get_vector(s).reshape(1, 64, 1)
            encoded_vec = autoencoder.encoder(vec).numpy()
            decoded_vec = autoencoder.decoder(encoded_vec).numpy()
            temp_results = wv.similar_by_vector(decoded_vec[0], topn=int(k / len(songs_in_dict)))
            all_results.extend(temp_results)
        
        return all_results[:100]
    else:
        return None

## Hyper Parameter tunning 
The parameters we tune are:
- optimizer
- loss

In [10]:
params_df = pd.DataFrame(columns=['time', 'optimizer', 'loss', 'R-Precision'])

optimizers = ['adam', 'sgd', 'adamax']

_losses = [(losses.MeanSquaredError, 'MSE'), (losses.CosineSimilarity, 'Cosine_Similarity')]

for optimizer in optimizers:
    for loss in _losses:
        start = time.time()
        
        autoencoder = ProdEmbToEmb()
        autoencoder.compile(optimizer=optimizer, loss=loss[0]())

        autoencoder.fit(train_in, train_out,
                        epochs=5,
                        shuffle=True,
                        validation_data=(val_in, val_out))

        similarity_prediction = [predict_using_similarity_to_decoding(sample[:3], 100, wv) for sample in test[50000:52000]]
        test_found = [val for val, tester in zip(test[50000:52000], similarity_prediction) if tester]
        similarity_prediction = [val for val in similarity_prediction if val]

        params_df = params_df.append({'time': (time.time() - start) / 60,
                                      'optimizer': optimizer,
                                      'loss': loss[1],
                                      'R-Precision': r_precision(test_found, similarity_prediction)},
                                     ignore_index=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
params_df

Unnamed: 0,time,optimizer,loss,R-Precision
0,2.567158,adam,MSE,0.094779
1,2.744873,adam,Cosine_Similarity,0.064768
2,2.754101,sgd,MSE,0.096905
3,2.857194,sgd,Cosine_Similarity,0.061437
4,2.923372,adamax,MSE,0.09633
5,3.059028,adamax,Cosine_Similarity,0.069093
