In [48]:
import tensorflow as tf
import tensorflow_recommenders as tfrs
import pandas as pd
from pprint import pprint
import numpy as np
import plotly.express as px
from typing import Dict, List

# Data preparation

In [147]:
# database is small enough to be read into memory hence we will use pandas and then cast it into a tf.data object
data = pd.read_csv('./data/full_data.csv',index_col=0)
data = data.dropna()
pprint(data.dtypes)
data.head()

artist               object
track_id             object
track                object
owner                object
artist&track         object
danceability        float64
energy              float64
key                   int64
loudness            float64
mode                  int64
speechiness         float64
acousticness        float64
instrumentalness    float64
liveness            float64
valence             float64
tempo               float64
dtype: object


Unnamed: 0,artist,track_id,track,owner,artist&track,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,Romanthony,1iwZVgKv3FKc0dqhnSG9uW,Get It 2 Getta,kv718oiku8q612q0zi4iaovzb,Romanthony/Get It 2 Getta,0.828,0.338,5,-17.362,0,0.151,0.038,0.804,0.0422,0.805,120.525
1,Glenn Underground,2JFeAyJLMPPsBkklQEet6t,H-Dance,kv718oiku8q612q0zi4iaovzb,Glenn Underground/H-Dance,0.696,0.524,0,-16.11,1,0.0489,0.000879,0.252,0.104,0.672,122.015
2,DJ BORING,3ilkEyg6OCtd9qCnOJkPzU,Winona,kv718oiku8q612q0zi4iaovzb,DJ BORING/Winona,0.656,0.693,5,-7.876,0,0.0703,0.0387,0.83,0.111,0.158,122.028
3,DJ BORING,3ilkEyg6OCtd9qCnOJkPzU,Winona,botetauw3lk6anvygizm8wolf,DJ BORING/Winona,0.656,0.693,5,-7.876,0,0.0703,0.0387,0.83,0.111,0.158,122.028
4,DJ BORING,3ilkEyg6OCtd9qCnOJkPzU,Winona,thebootlegboy,DJ BORING/Winona,0.656,0.693,5,-7.876,0,0.0703,0.0387,0.83,0.111,0.158,122.028


### Train/val/test split

In [3]:
from sklearn.model_selection import train_test_split

In [148]:
float_cols = [col for col in data.columns if data[col].dtype == 'float64']
all_cols = ['owner', 'artist&track'] + float_cols

In [149]:
df = data[all_cols]
train, test = train_test_split(df, test_size=0.15, random_state=21)
train, val = train_test_split(train, test_size=0.15, random_state=21)

For EDA of the datasets inspect ``dense.ipynb``

### Feature selection

In [150]:
tf_conv = lambda x: tf.data.Dataset.from_tensor_slices(dict(x))
train_tf = tf_conv(train)
val_tf = tf_conv(val)
test_tf = tf_conv(test)

In [152]:
tracks_df = df.copy()
tracks_df = tracks_df.drop('owner', axis=1)
tracks_df = tracks_df.drop_duplicates()
tracks_df.shape

(111490, 10)

In [153]:
unique_track_names = df['artist&track'].unique()
unique_user_ids = df['owner'].unique()

In [156]:
tracks = tf.data.Dataset.from_tensor_slices(dict(tracks_df))
next(iter(tracks))

2023-01-05 20:27:20.668795: W tensorflow/core/data/root_dataset.cc:266] Optimization loop failed: CANCELLED: Operation was cancelled


{'artist&track': <tf.Tensor: shape=(), dtype=string, numpy=b'Romanthony/Get It 2 Getta'>,
 'danceability': <tf.Tensor: shape=(), dtype=float64, numpy=0.828>,
 'energy': <tf.Tensor: shape=(), dtype=float64, numpy=0.338>,
 'loudness': <tf.Tensor: shape=(), dtype=float64, numpy=-17.362>,
 'speechiness': <tf.Tensor: shape=(), dtype=float64, numpy=0.151>,
 'acousticness': <tf.Tensor: shape=(), dtype=float64, numpy=0.038>,
 'instrumentalness': <tf.Tensor: shape=(), dtype=float64, numpy=0.804>,
 'liveness': <tf.Tensor: shape=(), dtype=float64, numpy=0.0422>,
 'valence': <tf.Tensor: shape=(), dtype=float64, numpy=0.805>,
 'tempo': <tf.Tensor: shape=(), dtype=float64, numpy=120.525>}

In [154]:
tracks1 = tf.data.Dataset.from_tensor_slices({'artist&track': unique_track_names})
# tracks1 = tf.data.Dataset.from_tensors(unique_track_names)

In [155]:
next(iter(tracks1))

2023-01-05 20:26:48.653012: W tensorflow/core/data/root_dataset.cc:266] Optimization loop failed: CANCELLED: Operation was cancelled


{'artist&track': <tf.Tensor: shape=(), dtype=string, numpy=b'Romanthony/Get It 2 Getta'>}

# Two tower model

### Query (user) model

A simple query model that involves a `tf.keras.layers.Embedding` layer and a stack of dense layers with a ReLu activation.

In [167]:
class UserModel(tf.keras.Model):

    def __init__(self, embedding_dimension, deep_layer_sizes):
        super().__init__()
        self._embedding_dimension = embedding_dimension

        self.user_embedding: tf.keras.layers.Layer = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None, name='user_string'),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, self._embedding_dimension),
        ])

        self._deep_layers = [tf.keras.layers.Dense(layer_size, activation='relu') for layer_size in deep_layer_sizes]

        # final dense layer must have size of the embedding dimension as it's imperative for the matrix factorisation with the
        # candidate matrix which has size (num_candidates, embedding_dimension)
        self._dense_final_layer = tf.keras.layers.Dense(self._embedding_dimension, activation='relu')

    def call(self, input_users):

        x = self.user_embedding(input_users)

        for deep_layer in self._deep_layers:
            x = deep_layer(x)

        return self._dense_final_layer(x)

### Candidate Model

The main feature for the candidate model is the `artist&track` field which seres as a unique identifier of a song. In the candidate model a `tf.keras.layers.Embedding` layer is used to convert the string into a vector of length `embedding_dimension`.

Additional audio features are leveraged to improve accuracy of the retrieval task. This is done by concatenating normalised audio features with the embedded vector and then passing the resultant tensor into a stack of dense layers.

In [157]:
class CandidateModel(tf.keras.Model):

    def __init__(self, embedding_dimension=32, deep_layer_sizes=[256]):
        super().__init__()
        self._embedding_dimension = embedding_dimension
        # audio cols that will be used in the concatenation with the embedded artist&song field
        self._audio_cols = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']


        self.track_embedding: tf.keras.layers.Layer = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_track_names, mask_token=None, name='track_lookup'),
            tf.keras.layers.Embedding(len(unique_track_names) + 1, self._embedding_dimension),
        ])

        self._audio_norm: tf.keras.layers.Layer = tf.keras.layers.BatchNormalization()

        self._deep_layers = [tf.keras.layers.Dense(layer_size, activation='relu') for layer_size in deep_layer_sizes]

        # final dense layer must have size of the embedding dimension as it's imperative for the matrix factorisation with the
        # query matrix which has size (num_query, embedding_dimension)
        self._dense_final_layer = tf.keras.layers.Dense(self._embedding_dimension, activation='relu')

    def call(self, inputs):

        tracks_embeddings = self.track_embedding(inputs["artist&track"])

        audio = tf.concat([tf.expand_dims(inputs[col],-1) for col in self._audio_cols], axis=1)
        audio = self._audio_norm(audio)

        x = tf.concat([tracks_embeddings, audio], axis=1)
        # x = tracks_embeddings

        for deep_layer in self._deep_layers:
            x = deep_layer(x)

        return self._dense_final_layer(x)


### Full retrieval model

The query and candidate towers are combined here in the full retrieval model.

In [163]:
class RetrievalModel(tfrs.Model):

    def __init__(self, embedding_dimension=32, query_dense_list=[256], candidate_dense_list=[256]):
        super().__init__()
        self._embedding_dimension: int = embedding_dimension
        self._query_dense_list: List[int] = query_dense_list
        self._candidate_dense_list: List[int] = candidate_dense_list

        self.query_model: tf.keras.Model = UserModel(self._embedding_dimension, self._query_dense_list)

        self.candidate_model: tf.keras.Model = CandidateModel(self._embedding_dimension, self._candidate_dense_list)

        self.task: tf.keras.layers.Layer = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=tracks.batch(128).map(self.candidate_model),
                ks=(50,100),
            )
        )

    def compute_loss(self, features, training=False) -> tf.Tensor:
        # We pick out the user features and pass them into the user model.
        user_embeddings = self.query_model(features["owner"])
        # And pick out the track features and pass them into the track model
        positive_track_embeddings = self.candidate_model(features)
        # The task computes the loss and the metrics.
        return self.task(user_embeddings, positive_track_embeddings, compute_metrics=not training)

In [164]:
model = RetrievalModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
train_cached = train_tf.batch(2048).cache()
model.fit(train_cached, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fa04ba82850>

In [165]:
val_cached = val_tf.batch(512).cache()
model.evaluate(val_cached)



[0.8931623697280884, 0.8933566212654114, 526.74951171875, 0, 526.74951171875]

In [166]:
model.evaluate(train_cached)



[0.8949984312057495, 0.8955298066139221, 14964.134765625, 0, 14964.134765625]

##### Very good results for the both the validation and training sets. A big improvement on the trivial embedding model.