In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [45]:
import os
import math
import statistics
import pandas as pd
import numpy as np
import pickle
import keras
import keras.backend as K
from keras.utils import Sequence
from keras.layers import Conv1D, MaxPool1D, BatchNormalization, GlobalAvgPool1D, Dense, Dropout, Activation, Reshape, Input, Concatenate, dot, Add, Flatten, concatenate, Embedding, add
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [46]:
%load_ext tensorboard

# **Original Model**
An implementation of the model as described in the *Deep Content-User Embedding Model for Music Recommendation* paper.

## **Creating the data**


*   Determine the number of users to use
*   Get all songs' ratings for those users
*   Consturct an interaction matrix
*   Create a dataframe of triplets: user id, postive item id, negative item id
*   create a N triplet for the same user (N = triplet_per_user)










In [None]:
def exsists_spectrogram(song_7did):
  return os.path.exists('/content/drive/My Drive/RS project/spectrograms/' + str(song_7did) + '.clip.npy')

def split_songs(song_7dids):
  train, test  = train_test_split(song_7dids, test_size=0.1, random_state=1)
  return train, test


def get_interactions(subset_df, subset_users, subset_songs, relevant_song_ids):
  interactions = []
  for user_id in subset_users:
    song_empty_onehot = np.zeros(len(subset_songs))
    users_songs = subset_df.loc[subset_df['user_id'] == user_id]
    for index, row in users_songs.iterrows():
      if row['song_id'] in relevant_song_ids:
        song_index = subset_songs.index(row['song_id'])
        song_empty_onehot[song_index] = row['listenings']
    interactions.append([song_empty_onehot])
  return interactions

def get_triplets(subset_users, triplets_per_user, interactions, dump_file_path):
  triplets_dict = {}
  triplets_counter = 0
  for user_index in range(len(subset_users)):
    for i in range(triplets_per_user):
      users_interactions = interactions[user_index]
      positive_users_indices = np.argwhere(np.array(users_interactions) > 0)
      negative_user_indices = np.argwhere(np.array(users_interactions) == 0)

      if positive_users_indices.shape[0] == 0 or negative_user_indices.shape[0] == 0:
        continue

      random_positive_index = positive_users_indices[np.random.randint(len(positive_users_indices))][1]
      random_negative_index = negative_user_indices[np.random.randint(len(negative_user_indices))][1]
      positive_song_id = subset_songs[random_positive_index]
      negative_song_id = subset_songs[random_negative_index]
      triplets_dict[triplets_counter] = [user_index, positive_song_id, negative_song_id]
      triplets_counter += 1

  triplets_df = pd.DataFrame.from_dict(triplets_dict, orient='index')
  triplets_df.columns = ['user_index', 'positive_song_id', 'negative_song_id']


  # clean df - get only rows with songs that have calculated spectrogram
  triplets_df = triplets_df[triplets_df['positive_song_id'].apply(exsists_spectrogram)]
  triplets_df = triplets_df[triplets_df['negative_song_id'].apply(exsists_spectrogram)]

  # dump
  pickle.dump(triplets_df, open(dump_file_path, 'wb'))

def create_data(num_users = 200):
  # read users history from file
  history = pd.read_csv('/content/drive/My Drive/RS project/10000.txt', delimiter='\t', header=None)
  history.columns = ['user_id', 'song_id', 'listenings']

  echonest_id_to_MSD_id = pickle.load(open('/content/drive/My Drive/RS project/echonest_id_to_MSD_id_unix.pkl', 'rb'))
  MSD_id_to_7D_id_unix = pickle.load(open('/content/drive/My Drive/RS project/MSD_id_to_7D_id_unix.pkl', 'rb'))
  history['song_id'] = history['song_id'].apply(lambda x: MSD_id_to_7D_id_unix[echonest_id_to_MSD_id[x]])

  # get subset of num_users users
  all_users = history['user_id'].unique()
  subset_users = all_users[:num_users]
  subset_df = history.loc[history['user_id'].isin(subset_users)]
  subset_songs = list(subset_df['song_id'].unique())

  return subset_df, subset_users, subset_songs

def split_data(subset_df, subset_users, subset_songs, triplets_per_user = 20):
  train, test = split_songs(subset_songs)


  # create interactions matrices
  train_interactions = get_interactions(subset_df, subset_users, subset_songs, train)
  test_interactions = get_interactions(subset_df, subset_users, subset_songs, test)

  # create triplets - triplets_per_user triplets per user
  get_triplets(subset_users, triplets_per_user, train_interactions, '/content/drive/My Drive/RS project/train_triplets_df.pkl')
  get_triplets(subset_users, triplets_per_user, testn_interactions, '/content/drive/My Drive/RS project/test_triplets_df.pkl')


In [None]:
def load_data():
  train =  pickle.load(open('/content/drive/My Drive/RS project/train_triplets_df.pkl', 'rb'))
  test =  pickle.load(open('/content/drive/My Drive/RS project/test_triplets_df.pkl', 'rb'))
  return train, test

In [None]:
subset_df, subset_users, subset_songs = create_data()
split_data(subset_df, subset_users, subset_songs)

In [None]:
train, test = load_data()

## **Get songs' spectrograms**


*   Spectrograms are created using mp3s_to_mel.py script from [this project](https://github.com/jongpillee/deep-content-user).
*   Each spectrogram is a numpy array of the shape (frequency_bins, timestamps)










In [None]:
def get_spectrogram(song_7did, length = 1000, mean = 0.2262, std = 0.2579): 
  file_path = '/content/drive/My Drive/RS project/spectrograms/' + song_7did + '.clip.npy'
  spectrogram = np.load(file_path).T

  # fit spectrogram to the given length
  # if spectrogram is too short, repeat the melody until length is reached
  while spectrogram.shape[0] < length:
    spectrogram = np.tile(spectrogram,(2,1))
  # cut the spectrogram to fit length exactly
  spectrogram = spectrogram[:length]

  # normalize spectrogram
  spectrogram -= mean
  spectrogram /= std

  spec_len = spectrogram.shape[0]
  start_spec = np.random.randint(spec_len-130)
  spectrogram = spectrogram[start_spec:]
  if spectrogram.shape[0] < spec_len:
      spectrogram = np.tile(spectrogram,(100,1))
      spectrogram = spectrogram[:spec_len]

  return spectrogram

## **Create data generator**

*   The output of the generator is of the form: x = the triplet [user_index, pos_spectrogram, neg_spectrogram], y = [1,0] (meaning that the first item should be classified as positive)
*   Each generator step outputs a batch (of size batch_size) of the described above

In [None]:
#  create  generator
# step output: user_index, pos_spectrogram, neg_spectrogram, [1,0]
class Generator(Sequence):

    def __init__(self, df, batch_size):
        self.df = df
        self.batch_size = batch_size

    def __len__(self):
        return math.ceil(len(self.df) / self.batch_size)

    def __getitem__(self, idx):
        batch_df = self.df[idx * self.batch_size:(idx + 1) *
        self.batch_size]

        user_index_batch = []
        pos_spectrograms_batch = []
        neg_spectrograms_batch = []
        y_batch = []

        for index, row in batch_df.iterrows():
          user_index_batch.append(row['user_index'])
          pos_spectrograms_batch.append(get_spectrogram(row['positive_song_id']))
          neg_spectrograms_batch.append(get_spectrogram(row['negative_song_id']))
          y_batch.append([1,0])

        user_index_batch = np.asarray(user_index_batch)
        pos_spectrograms_batch = np.asarray(pos_spectrograms_batch)
        neg_spectrograms_batch = np.asarray(neg_spectrograms_batch)
        y_batch = np.asarray(y_batch)

        return [user_index_batch, pos_spectrograms_batch, neg_spectrograms_batch], y_batch

## **Define the Model class**

The model is consturcted of three sub models:
*   User (anchor) model - which learns the users' embeddings
*   Positive item model - which learns the positive items' embeddings
*   Negative item model - which learns the negative items' embeddings

In [None]:
def hinge_loss(y_true,y_pred):
    # hinge loss
    y_pos = y_pred[:,:1]
    y_neg = y_pred[:,1:]
    loss = K.sum(K.maximum(0., 0.2 - y_pos + y_neg))
    return loss

In [None]:
class Model:
  def __init__(self, num_users, num_songs, weights_path, spectrogram_length = 1000, freq_bins = 128,dim_embedding = 300, lr = 0.2, lrdecay = 1e-6):
    self.num_users = num_users
    self.num_songs = num_songs
    self.dim_embedding = dim_embedding
    self.weights_path = weights_path

    self.callbacks = [ModelCheckpoint(
    weights_path, monitor='val_loss', verbose=0, save_best_only=True,
    save_weights_only=True), 
    TensorBoard(log_dir = '/content/drive/My Drive/RS project/logs_original_model', histogram_freq=1)]

    self.user_index_input = Input(shape=(1,))
    self.pos_item_input = Input(shape=(spectrogram_length, freq_bins))
    self.neg_item_input = Input(shape=(spectrogram_length, freq_bins))

    # user model - one hot
    user_dict = Embedding(num_users, 300, input_length=1)
    user_flat = Flatten()
    user_activ1 = Activation('relu')
    user_dense2 = Dense(300)
    user_activ2 = Activation('relu')
    user_sem = Dense(self.dim_embedding,activation='linear')
    

    # anchor user
    anchor_user_dense1 =  user_dict(self.user_index_input)
    anchor_user_flat = user_flat(anchor_user_dense1)
    anchor_user_activ1 = user_activ1(anchor_user_flat)
    anchor_user_dense2 = user_dense2(anchor_user_activ1)
    anchor_user_activ2 = user_activ2(anchor_user_dense2)
    self.anchor_user_sem = user_sem(anchor_user_activ2)

    # item model **audio**
    conv1 = Conv1D(128,4,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ1 = Activation('relu')
    MP1 = MaxPool1D(pool_size=4)
    conv2 = Conv1D(self.dim_embedding,4,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ2 = Activation('relu')
    MP2 = MaxPool1D(pool_size=4)
    conv3 = Conv1D(self.dim_embedding,4,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ3 = Activation('relu')
    MP3 = MaxPool1D(pool_size=4)
    conv4 = Conv1D(self.dim_embedding,2,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ4 = Activation('relu')
    MP4 = MaxPool1D(pool_size=2)
    conv5 = Conv1D(self.dim_embedding,1,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ5 = Activation('relu')

    avg_pool = GlobalAvgPool1D()
    item_sem = Dense(self.dim_embedding,activation='linear')

    # pos item
    pos_item_conv1 = conv1(self.pos_item_input)
    pos_item_activ1 = activ1(pos_item_conv1)
    pos_item_MP1 = MP1(pos_item_activ1)
    pos_item_conv2 = conv2(pos_item_MP1)
    pos_item_activ2 = activ2(pos_item_conv2)
    pos_item_MP2 = MP2(pos_item_activ2)
    pos_item_conv3 = conv3(pos_item_MP2)
    pos_item_activ3 = activ3(pos_item_conv3)
    pos_item_MP3 = MP3(pos_item_activ3)
    pos_item_conv4 = conv4(pos_item_MP3)
    pos_item_activ4 = activ4(pos_item_conv4)
    pos_item_MP4 = MP4(pos_item_activ4)
    pos_item_conv5 = conv5(pos_item_MP4)
    pos_item_activ5 = activ5(pos_item_conv5)
    pos_item_avg = avg_pool(pos_item_activ5)
    self.pos_item_sem = item_sem(pos_item_avg)

    # neg item
    neg_item_bn1 = conv1(self.neg_item_input)
    neg_item_activ1 = activ1(neg_item_bn1)
    neg_item_MP1 = MP1(neg_item_activ1)
    neg_item_bn2 = conv2(neg_item_MP1)
    neg_item_activ2 = activ2(neg_item_bn2)
    neg_item_MP2 = MP2(neg_item_activ2)
    neg_item_bn3 = conv3(neg_item_MP2)
    neg_item_activ3 = activ3(neg_item_bn3)
    neg_item_MP3 = MP3(neg_item_activ3)
    neg_item_bn4 = conv4(neg_item_MP3)
    neg_item_activ4 = activ4(neg_item_bn4)
    neg_item_MP4 = MP4(neg_item_activ4)
    neg_item_bn5 = conv5(neg_item_MP4)
    neg_item_activ5 = activ5(neg_item_bn5)
    neg_item_avg = avg_pool(neg_item_activ5)
    self.neg_item_sem = item_sem(neg_item_avg)

    # when using normalize=True, norm_a = a/|a|. Thus, for cosine similarity we can use dot product.
    v_p = dot([self.anchor_user_sem, self.pos_item_sem], axes = 1, normalize = True)
    v_ns = dot([self.anchor_user_sem, self.neg_item_sem], axes = 1, normalize = True)

    prob = concatenate([v_p] + [v_ns])
    output = Activation('linear', name='output')(prob)

    self.full_model = keras.models.Model(inputs = [self.user_index_input, self.pos_item_input, self.neg_item_input], outputs = output)
    sgd = SGD(lr=lr,decay=lrdecay,momentum=0.9,nesterov=True)
    self.full_model.compile(optimizer=sgd,loss=hinge_loss,metrics=['accuracy'])

    self.audio_model = keras.models.Model(inputs = self.pos_item_input, outputs = self.pos_item_sem)
    self.audio_model.compile(optimizer=sgd,loss=hinge_loss,metrics=['accuracy'])

    self.user_model = keras.models.Model(inputs = self.user_index_input, outputs = self.anchor_user_sem)
    self.user_model.compile(optimizer=sgd,loss=hinge_loss,metrics=['accuracy'])

  def fit(self, train_generator, epochs):
    self.full_model.fit(x = train_generator, epochs=epochs, callbacks = self.callbacks)

  def load(self):
    self.full_model.load_weights(self.weights_path)

  def encode(self, items, item_embedding_path, user_embedding_path):
    # encode items and save to files
    item_embedding = np.zeros((self.num_songs,self.dim_embedding))
    for index, song_7did in enumerate(items):
      if not os.path.exists('/content/drive/My Drive/RS project/spectrograms/' + song_7did + '.clip.npy'):
        continue
      spectrogram = get_spectrogram(song_7did)
      predicted = self.audio_model.predict(np.expand_dims(spectrogram, axis=0))
      item_embedding[index] = np.mean(predicted,axis=0)
    np.save(item_embedding_path ,item_embedding)

    # encode users
    user_embedding = self.user_model.predict(np.arange(self.num_users))
    np.save(user_embedding_path, user_embedding)


 ## **Training**

In [None]:
num_songs = len(subset_songs)

In [None]:
# init the model
model = Model(num_users = 200, num_songs = num_songs, weights_path = '/content/drive/My Drive/RS project/model_weights.h5')
model.load()

In [None]:
# init the generators
train_generator = Generator(train, batch_size=5)

model.fit(train_generator, epochs=100)

In [48]:
# %tensorboard --logdir '/content/drive/My Drive/RS project/logs_original_model'

 ## **Saving the embeddings**

In [None]:
item_embedding_path = '/content/drive/My Drive/RS project/item_embedding.npy'
user_embedding_path = '/content/drive/My Drive/RS project/user_embedding.npy'
model.encode(subset_songs, item_embedding_path, user_embedding_path)

 ## **Evaluation**
 

*   Getting the original interaction matrix
*   Getting the predicted interaction matrix by applying cosine similarity between the users' and the items' embeddings
*   Computing avarage AUC of the original and the predicted interaction matrices




In [None]:
def get_original_interactions(subset_df, subset_users, subset_songs):
  interactions = np.array(get_interactions(subset_df, subset_users, subset_songs, subset_songs))
  # convert to binary
  interactions[interactions >= 1] = 1
  interactions = interactions.reshape(len(subset_users), len(subset_songs))
  return interactions

In [None]:
def get_predicted_interactions(item_embedding_path, user_embedding_path, num_users = 200):
  item_embedding = np.load(item_embedding_path)
  user_embedding = np.load(user_embedding_path)
  interactions = cosine_similarity(item_embedding, user_embedding)
  interactions = np.array(interactions)
  interactions = interactions.T
  return interactions

In [None]:
def get_auc_score(original_interactions, predicted_interactions, num_users = 200):
  users_auc = []
  for user_index in range(num_users):
    try:
      user_auc = metrics.roc_auc_score(original_interactions[user_index], predicted_interactions[user_index], multi_class='ovo')
      users_auc.append(user_auc)
    except ValueError:
      pass
  return statistics.mean(users_auc)

In [None]:
def get_pop_auc_score(original_interactions, pop, num_users = 200):
  users_auc = []
  for user_index in range(num_users):
    try:
      user_auc = metrics.roc_auc_score(original_interactions[user_index], pop, multi_class='ovo')
      users_auc.append(user_auc)
    except ValueError:
      pass
  return statistics.mean(users_auc)

In [None]:
train_songs = list(set(list(train.positive_song_id.unique()) + list(train.negative_song_id.unique())))
train_indices = [subset_songs.index(song_id) for song_id in train_songs]


test_songs = list(set(list(test.positive_song_id.unique()) + list(test.negative_song_id.unique())))
test_indices = [subset_songs.index(song_id) for song_id in test_songs]

In [None]:
item_embedding_path = '/content/drive/My Drive/RS project/item_embedding.npy'
user_embedding_path = '/content/drive/My Drive/RS project/user_embedding.npy'
original_interactions = get_original_interactions(subset_df, subset_users, subset_songs)
predicted_interactions = get_predicted_interactions(item_embedding_path, user_embedding_path)

In [None]:
auc = get_auc_score(original_interactions[:,test_indices], predicted_interactions[:,test_indices])

In [None]:
auc

0.7147152156031262

 ### **Comparing to popularity based recommendation**

In [None]:
# Popular items
pop_items = np.sum(original_interactions[:,test_indices],axis=0)
auc = get_pop_auc_score(original_interactions[:,test_indices], pop_items, num_users = 200)

In [None]:
auc

0.6472221541205211

# **Modified Model**
A modified model, which combines songs' metadata as items' content, in addition to the songs' spectrograms.

## **Creating additional data**
Creating a dataframe whcih contains songs' metadata










In [None]:
def create_metadata(songs, dump_file_path = '/content/drive/My Drive/RS project/metadata_df.pkl'):

  MSD_id_to_7D_id_unix = pickle.load(open('/content/drive/My Drive/RS project/MSD_id_to_7D_id_unix.pkl', 'rb'))
  metadata_df = pd.read_csv('/content/drive/My Drive/RS project/echonest_data.csv')
  metadata_df['song_7did'] = metadata_df['track_id'].apply(lambda x: MSD_id_to_7D_id_unix[x])

  metadata_dict = {}
  metadata_counter = 0

  for song_7did in songs:
    row = metadata_df[metadata_df['song_7did'] == song_7did]
    metadata_dict[metadata_counter] = {'song_7did' : song_7did}
    if row.shape[0] > 0:
      metadata_dict[metadata_counter]['artist_name'] = row.iloc[0]['artist_name']
      metadata_dict[metadata_counter]['tempo'] = row.iloc[0]['tempo']
      metadata_dict[metadata_counter]['loudness'] = row.iloc[0]['loudness']
      metadata_dict[metadata_counter]['mode'] = row.iloc[0]['mode']

    metadata_counter += 1
  
  metadata_df = pd.DataFrame.from_dict(metadata_dict, orient='index')

  # normalize tempo and loudness between 0 and 1
  metadata_df['tempo_norm'] = (metadata_df['tempo']-metadata_df['tempo'].min())/(metadata_df['tempo'].max()-metadata_df['tempo'].min())
  metadata_df['loudness_norm'] = (metadata_df['loudness']-metadata_df['loudness'].min())/(metadata_df['loudness'].max()-metadata_df['loudness'].min())
  metadata_df = metadata_df.drop(['tempo', 'loudness'], axis=1)
  metadata_df = metadata_df.rename(columns={"tempo_norm": "tempo", "loudness_norm": "loudness"})

  metadata_df = metadata_df.dropna()

  pickle.dump(metadata_df, open(dump_file_path, 'wb'))

In [None]:
def load_metadata(dump_file_path = '/content/drive/My Drive/RS project/metadata_df.pkl'):
  return pickle.load(open(dump_file_path, 'rb'))

In [None]:
# create_metadata(subset_songs)
metadata_df = load_metadata()

In [None]:
metadata_df.head()

Unnamed: 0,song_7did,artist_name,mode,tempo,loudness
0,2093263,Jack Johnson,1.0,0.476799,0.566081
1,2631392,Paco De Lucia,0.0,0.741553,0.828729
2,1356697,Kanye West,1.0,0.532296,0.793565
3,2424087,Jack Johnson,1.0,0.468307,0.666338
4,3360473,Foo Fighters,0.0,0.522993,0.919507


## **Modify data generator**

*   The x part of the genrator's outpus should contain songs' metadata

In [None]:
#  create  generator
# step output: user_index, pos_spectrogram, neg_spectrogram, [1,0]
class ModifiedGenerator(Sequence):

    def __init__(self, df, metadata_df, batch_size):
        self.df = df
        self.metadata_df = metadata_df
        self.artists = list(metadata_df.artist_name.unique())
        self.batch_size = batch_size
        self.metadata_length = len(self.artists) + 3

    def __len__(self):
        return math.ceil(len(self.df) / self.batch_size)

    def __getitem__(self, idx):
        batch_df = self.df[idx * self.batch_size:(idx + 1) *
        self.batch_size]

        user_index_batch = []
        pos_spectrograms_batch = []
        pos_metdata_batch = []
        neg_spectrograms_batch = []
        neg_metdata_batch = []
        y_batch = []

        for index, row in batch_df.iterrows():
          # get anchor user
          user_index_batch.append(row['user_index'])

          # get positive item
          pos_song_7did = row['positive_song_id']
          # get spectrogram
          pos_spectrograms_batch.append(get_spectrogram(pos_song_7did))
          # get metadata
          # if no metadata is available
          if self.metadata_df[self.metadata_df['song_7did'] == pos_song_7did].shape[0] == 0:
            pos_metdata_batch.append(np.zeros(self.metadata_length))
          else:
            pos_metdata = self.get_metadata(pos_song_7did)
            pos_metdata_batch.append(pos_metdata)
          
          # get negative item
          neg_song_7did = row['negative_song_id']
          # get spectrogram
          neg_spectrograms_batch.append(get_spectrogram(neg_song_7did))
          # get metadata
          if self.metadata_df[self.metadata_df['song_7did'] == neg_song_7did].shape[0] == 0:
            neg_metdata_batch.append(np.zeros(self.metadata_length))
          else:
            neg_metdata = self.get_metadata(neg_song_7did)
            neg_metdata_batch.append(neg_metdata)

          # append y
          y_batch.append([1,0])


        return  ({'user_index_input': np.array(user_index_batch), 'pos_item_input': np.array(pos_spectrograms_batch),
                  'pos_item_metadata_input': np.array(pos_metdata_batch), 'neg_item_input': np.array(neg_spectrograms_batch),
                  'neg_item_metadata_input': np.array(neg_metdata_batch)
                  }, {'output': np.array(y_batch)})

    def to_categorical(self, artist_name):
      num_artists = len(self.artists)
      one_hot = np.zeros(num_artists)
      artists_index = self.artists.index(artist_name)
      one_hot[artists_index] = 1
      return list(one_hot)

    def get_metadata(self, song_7did):
      metadata_row = self.metadata_df[self.metadata_df['song_7did'] == song_7did]
      metadata_row = metadata_row.iloc[0]
      artist_name = metadata_row['artist_name']
      artist_one_hot = self.to_categorical(artist_name)
      tempo = [metadata_row['tempo']]
      loudness = [metadata_row['loudness']]
      mode = [metadata_row['mode']]

      # concat into one vector
      return np.array(artist_one_hot  + tempo + loudness + mode)


## **Modify the model**

Expand the positive and negative models to include a sub-model which deals with metadata

In [None]:
class ModifiedModel:
  def __init__(self, num_users, num_songs, weights_path, metadata_size, generator, spectrogram_length = 1000, freq_bins = 128,dim_embedding = 300, lr = 0.2, lrdecay = 1e-6):
    self.num_users = num_users
    self.num_songs = num_songs
    self.dim_embedding = dim_embedding
    self.metadata_size = metadata_size
    self.weights_path = weights_path
    self.generator = generator

    self.callbacks = [ModelCheckpoint(
    weights_path, monitor='val_loss', verbose=0, save_best_only=True,
    save_weights_only=True), 
    TensorBoard(log_dir = '/content/drive/My Drive/RS project/logs_midified_model', histogram_freq=1)]

    self.user_index_input = Input(shape=(1,), name='user_index_input')
    self.pos_item_input = Input(shape=(spectrogram_length, freq_bins), name='pos_item_input')
    self.pos_item_metadata_input = Input(shape=(metadata_size,), name='pos_item_metadata_input')
    self.neg_item_input = Input(shape=(spectrogram_length, freq_bins), name='neg_item_input')
    self.neg_item_metadata_input = Input(shape=(metadata_size,), name='neg_item_metadata_input')


    # user model - one hot
    user_dict = Embedding(num_users, 300, input_length=1)
    user_flat = Flatten()
    user_activ1 = Activation('relu')
    user_dense2 = Dense(300)
    user_activ2 = Activation('relu')
    user_sem = Dense(self.dim_embedding,activation='linear')

    # anchor user
    anchor_user_dense1 =  user_dict(self.user_index_input)
    anchor_user_flat = user_flat(anchor_user_dense1)
    anchor_user_activ1 = user_activ1(anchor_user_flat)
    anchor_user_dense2 = user_dense2(anchor_user_activ1)
    anchor_user_activ2 = user_activ2(anchor_user_dense2)
    self.anchor_user_sem = user_sem(anchor_user_activ2)

    # item model **audio**
    conv1 = Conv1D(128,4,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ1 = Activation('relu')
    MP1 = MaxPool1D(pool_size=4)
    conv2 = Conv1D(self.dim_embedding,4,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ2 = Activation('relu')
    MP2 = MaxPool1D(pool_size=4)
    conv3 = Conv1D(self.dim_embedding,4,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ3 = Activation('relu')
    MP3 = MaxPool1D(pool_size=4)
    conv4 = Conv1D(self.dim_embedding,2,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ4 = Activation('relu')
    MP4 = MaxPool1D(pool_size=2)
    conv5 = Conv1D(self.dim_embedding,1,padding='same',use_bias=True,kernel_initializer='he_uniform')
    activ5 = Activation('relu')

    avg_pool = GlobalAvgPool1D()
    item_sem = Dense(self.dim_embedding,activation='linear')

    # pos item
    pos_item_conv1 = conv1(self.pos_item_input)
    pos_item_activ1 = activ1(pos_item_conv1)
    pos_item_MP1 = MP1(pos_item_activ1)
    pos_item_conv2 = conv2(pos_item_MP1)
    pos_item_activ2 = activ2(pos_item_conv2)
    pos_item_MP2 = MP2(pos_item_activ2)
    pos_item_conv3 = conv3(pos_item_MP2)
    pos_item_activ3 = activ3(pos_item_conv3)
    pos_item_MP3 = MP3(pos_item_activ3)
    pos_item_conv4 = conv4(pos_item_MP3)
    pos_item_activ4 = activ4(pos_item_conv4)
    pos_item_MP4 = MP4(pos_item_activ4)
    pos_item_conv5 = conv5(pos_item_MP4)
    pos_item_activ5 = activ5(pos_item_conv5)
    pos_item_avg = avg_pool(pos_item_activ5)
    pos_spec_model_output = item_sem(pos_item_avg)

    # neg item
    neg_item_bn1 = conv1(self.neg_item_input)
    neg_item_activ1 = activ1(neg_item_bn1)
    neg_item_MP1 = MP1(neg_item_activ1)
    neg_item_bn2 = conv2(neg_item_MP1)
    neg_item_activ2 = activ2(neg_item_bn2)
    neg_item_MP2 = MP2(neg_item_activ2)
    neg_item_bn3 = conv3(neg_item_MP2)
    neg_item_activ3 = activ3(neg_item_bn3)
    neg_item_MP3 = MP3(neg_item_activ3)
    neg_item_bn4 = conv4(neg_item_MP3)
    neg_item_activ4 = activ4(neg_item_bn4)
    neg_item_MP4 = MP4(neg_item_activ4)
    neg_item_bn5 = conv5(neg_item_MP4)
    neg_item_activ5 = activ5(neg_item_bn5)
    neg_item_avg = avg_pool(neg_item_activ5)
    neg_spec_model_output = item_sem(neg_item_avg)

    # item model **metadata**
    metadata_dense_1 = Dense(300, activation='relu')
    metadata_batch_norm_1 = BatchNormalization()
    metadata_batch_tanh_1 = Activation('tanh')
    metadata_batch_dropout_1 = Dropout(0.5)

    metadata_dense_2 = Dense(200, activation='relu')
    metadata_batch_norm_2 = BatchNormalization()
    metadata_batch_tanh_2 = Activation('tanh')
    metadata_batch_dropout_2 = Dropout(0.5)

    metadata_dense_3 = Dense(150, activation='relu')
    metadata_batch_norm_3 = BatchNormalization()
    metadata_batch_tanh_3 = Activation('tanh')
    metadata_batch_dropout_3 = Dropout(0.5)

    metadata_dense_4 = Dense(50, activation='relu')
    metadata_batch_norm_4 = BatchNormalization()
    metadata_batch_softmex = Activation('softmax')



    # pos item
    pos_metadata_output_1 = metadata_dense_1(self.pos_item_metadata_input)
    pos_metadata_output_2 = metadata_batch_norm_1(pos_metadata_output_1)
    pos_metadata_output_3 = metadata_batch_tanh_1(pos_metadata_output_2)
    pos_metadata_output_4 = metadata_batch_dropout_1(pos_metadata_output_3)

    pos_metadata_output_5 = metadata_dense_2(pos_metadata_output_4)
    pos_metadata_output_6 = metadata_batch_norm_2(pos_metadata_output_5)
    pos_metadata_output_7 = metadata_batch_tanh_2(pos_metadata_output_6)
    pos_metadata_output_8 = metadata_batch_dropout_2(pos_metadata_output_7)

    pos_metadata_output_9 = metadata_dense_3(pos_metadata_output_8)
    pos_metadata_output_10 = metadata_batch_norm_3(pos_metadata_output_9)
    pos_metadata_output_11 = metadata_batch_tanh_3(pos_metadata_output_10)
    pos_metadata_output_12 = metadata_batch_dropout_3(pos_metadata_output_11)

    pos_metadata_output_13 = metadata_dense_4(pos_metadata_output_12)
    pos_metadata_output_14 = metadata_batch_norm_4(pos_metadata_output_13)
    pos_metadata_output = metadata_batch_softmex(pos_metadata_output_14)
 
    # neg item
    neg_metadata_output_1 = metadata_dense_1(self.neg_item_metadata_input)
    neg_metadata_output_2 = metadata_batch_norm_1(neg_metadata_output_1)
    neg_metadata_output_3 = metadata_batch_tanh_1(neg_metadata_output_2)
    neg_metadata_output_4 = metadata_batch_dropout_1(neg_metadata_output_3)

    neg_metadata_output_5 = metadata_dense_2(neg_metadata_output_4)
    neg_metadata_output_6 = metadata_batch_norm_2(neg_metadata_output_5)
    neg_metadata_output_7 = metadata_batch_tanh_2(neg_metadata_output_6)
    neg_metadata_output_8 = metadata_batch_dropout_2(neg_metadata_output_7)

    neg_metadata_output_9 = metadata_dense_3(neg_metadata_output_8)
    neg_metadata_output_10 = metadata_batch_norm_3(neg_metadata_output_9)
    neg_metadata_output_11 = metadata_batch_tanh_3(neg_metadata_output_10)
    neg_metadata_output_12 = metadata_batch_dropout_3(neg_metadata_output_11)

    neg_metadata_output_13 = metadata_dense_4(neg_metadata_output_12)
    neg_metadata_output_14 = metadata_batch_norm_4(neg_metadata_output_13)
    neg_metadata_output = metadata_batch_softmex(neg_metadata_output_14)

    #combine spectrogram model output and metadata model output
    concat_spec_metadata = Concatenate()
    dense_concat = Dense(self.dim_embedding)

    pos_concat = concat_spec_metadata([pos_spec_model_output, pos_metadata_output])
    self.pos_combined = dense_concat(pos_concat)
    neg_concat = concat_spec_metadata([neg_spec_model_output, neg_metadata_output])
    self.neg_combined = dense_concat(neg_concat)

    # compute cosine similarity 
    # when using normalize=True, norm_a = a/|a|. Thus, for cosine similarity we can use dot product.
    v_p = dot([self.anchor_user_sem, self.pos_combined], axes = 1, normalize = True)
    v_ns = dot([self.anchor_user_sem, self.neg_combined], axes = 1, normalize = True)

    prob = concatenate([v_p] + [v_ns])
    output = Activation('linear', name='output')(prob)

    self.full_model = keras.models.Model(inputs = [self.user_index_input,
                                                   self.pos_item_input, self.pos_item_metadata_input,
                                                   self.neg_item_input, self.neg_item_metadata_input], outputs = output)
    
    
    sgd = SGD(lr=lr,decay=lrdecay,momentum=0.9,nesterov=True)
    self.full_model.compile(optimizer=sgd,loss=hinge_loss,metrics=['accuracy'])

    self.audio_model = keras.models.Model(inputs = [self.pos_item_input, self.pos_item_metadata_input], outputs = self.pos_combined)
    self.audio_model.compile(optimizer=sgd,loss=hinge_loss,metrics=['accuracy'])

    self.user_model = keras.models.Model(inputs = self.user_index_input, outputs = self.anchor_user_sem)
    self.user_model.compile(optimizer=sgd,loss=hinge_loss,metrics=['accuracy'])

  def fit(self, train_generator, epochs):
    self.full_model.fit(x = train_generator, epochs=epochs, callbacks = self.callbacks)
  
  def load(self):
    self.full_model.load_weights(self.weights_path)

  def encode(self, items, item_embedding_path, user_embedding_path):
    # encode items and save to files
    item_embedding = np.zeros((self.num_songs,self.dim_embedding))
    for index, song_7did in enumerate(items):
      if not os.path.exists('/content/drive/My Drive/RS project/spectrograms/' + song_7did + '.clip.npy'):
        continue
      if self.generator.metadata_df[self.generator.metadata_df['song_7did'] == song_7did].shape[0] == 0:
        continue
      spectrogram = get_spectrogram(song_7did)
      metadata = self.generator.get_metadata(song_7did)
      input = {'pos_item_input': np.array([spectrogram]),
       'pos_item_metadata_input': np.array([metadata])
      }
      predicted = self.audio_model.predict(input)
      item_embedding[index] = np.mean(predicted,axis=0)
    np.save(item_embedding_path ,item_embedding)

    # encode users
    user_embedding = self.user_model.predict(np.arange(self.num_users))
    np.save(user_embedding_path, user_embedding)



 ## **Training**

In [None]:
# define num_songs 
# users = triplets_df['user_index'].unique()
# songs = set(list(triplets_df['positive_song_7did'].unique()) + list(triplets_df['negative_song_7did'].unique()))
num_songs = len(subset_songs)
artists = list(metadata_df.artist_name.unique())
metadata_size = len(artists) + 3

# init the generator 
train_generator = ModifiedGenerator(train, metadata_df, batch_size=5)


# init the model
model = ModifiedModel(num_users = 200, num_songs = num_songs, weights_path = '/content/drive/My Drive/RS project/modified_model_weights.h5', metadata_size = metadata_size, generator = train_generator)
model.load()

In [None]:
# train
model.fit(train_generator, epochs=100)

In [49]:
# %tensorboard --logdir '/content/drive/My Drive/RS project/logs_midified_model'

 ## **Saving the embeddings**

In [None]:
item_embedding_path = '/content/drive/My Drive/RS project/modified_item_embedding.npy'
user_embedding_path = '/content/drive/My Drive/RS project/modified_user_embedding.npy'
model.encode(subset_songs, item_embedding_path, user_embedding_path)

 ## **Evaluation**
 

*   Getting the original interaction matrix
*   Getting the predicted interaction matrix by applying cosine similarity between the users' and the items' embeddings
*   Computing avarage AUC of the original and the predicted interaction matrices




In [None]:
item_embedding_path = '/content/drive/My Drive/RS project/modified_item_embedding.npy'
user_embedding_path = '/content/drive/My Drive/RS project/modified_user_embedding.npy'
original_interactions = get_original_interactions(subset_df, subset_users, subset_songs)
predicted_interactions = get_predicted_interactions(item_embedding_path, user_embedding_path)
auc = get_auc_score(original_interactions[:,train_indices], predicted_interactions[:,train_indices])


In [None]:
auc

0.7285648387544025