# TFRS - Ranking model

In [1]:
# !pip install tensorflow-recommenders google-cloud-aiplatform --user
! python3 -c "import google.cloud.aiplatform; print('aiplatform SDK version: {}'.format(google.cloud.aiplatform.__version__))"

aiplatform SDK version: 1.26.0


In [2]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")

PROJECT_ID: hybrid-vertex
PROJECT_NUM: 934903580331
LOCATION: us-central1


In [3]:
REGION           = 'us-central1' 
BUCKET_NAME      = 'matching-engine-content'    # location to store output
BUCKET_URI       = f'gs://{BUCKET_NAME}'

DATA_VERSION     = "v2-0-0" # version tag for dataflow pipeline

BUCKET_DATA_DIR  = 'spotify-data-regimes'
TRAIN_DIR_PREFIX = f'valid' 
VALID_DIR_PREFIX = f'valid'
CANDIDATE_PREFIX = f'candidates' 

In [4]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()

In [5]:
import gc
gc.collect()

23

### imports 

In [6]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
os.environ['TF_GPU_THREAD_MODE']='gpu_private'
os.environ['TF_GPU_ALLOCATOR']='cuda_malloc_async'
os.environ["CLOUD_ML_PROJECT_ID"] = PROJECT_ID

In [7]:
import json
import numpy as np
import pickle as pkl

import logging
import time
from pprint import pprint

# tensorflow
import tensorflow as tf
import tensorflow_recommenders as tfrs

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

from google.cloud import aiplatform as vertex_ai
from google.cloud import storage
storage_client = storage.Client(project=PROJECT_ID)

from src.ranking import tf_ranking_model as tfrm
# from src.two_tower_jt import train_utils
# from src.two_tower_jt import feature_sets

from util import feature_sets, train_utils, test_instances

In [8]:
# train_utils.full_parse
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

## Dataset

In [9]:
!tree src/ranking

[01;34msrc/ranking[00m
├── [01;34m__pycache__[00m
│   └── tf_ranking_model.cpython-310.pyc
└── tf_ranking_model.py

1 directory, 2 files


### create dataset objects

In [10]:
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.AUTO

batch_size = 1024 #*16

#### train dataset

In [11]:
# train_files = []
# for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_VERSION}/{TRAIN_DIR_PREFIX}/'):
#     if '.tfrecords' in blob.name:
#         train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
# train_files    
# train_dataset = tf.data.Dataset.from_tensor_slices(train_files).prefetch(
#     tf.data.AUTOTUNE,
# )

# train_dataset = train_dataset.interleave(
#     train_utils.full_parse,
#     cycle_length=tf.data.AUTOTUNE, 
#     num_parallel_calls=tf.data.AUTOTUNE,
#     deterministic=False,
# ).map(
#     feature_sets.parse_tfrecord, 
#     num_parallel_calls=tf.data.AUTOTUNE
# ).batch(
#     batch_size 
# ).prefetch(
#     tf.data.AUTOTUNE,
# ).with_options(
#     options
# )

# # train_dataset

In [12]:
# train_sample = []

# # check dataset output
# for x in train_dataset.batch(1).take(1):
#     # pprint(x)
#     train_sample.append(x)
    
# len(train_sample)

In [13]:
# train_sample

#### validation set

In [14]:
valid_files = []
for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_VERSION}/{VALID_DIR_PREFIX}/'):
    if '.tfrecords' in blob.name:
        valid_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))


valid_dataset = tf.data.Dataset.from_tensor_slices(valid_files).prefetch(
    tf.data.AUTOTUNE,
)

valid_dataset = valid_dataset.interleave(
    train_utils.full_parse,
    num_parallel_calls=tf.data.AUTOTUNE,
    cycle_length=tf.data.AUTOTUNE, 
    deterministic=False,
).map(
    feature_sets.parse_tfrecord, 
    num_parallel_calls=tf.data.AUTOTUNE
).batch(
    batch_size
).prefetch(
    tf.data.AUTOTUNE,
).with_options(
    options
)

# valid_dataset = valid_dataset.cache() #1gb machine mem + 400 MB in candidate ds (src/two-tower.py)

# valid_dataset

#### Create Candidates dataset

In [15]:
# candidate_files = []
# for blob in storage_client.list_blobs(f"{BUCKET_NAME}", prefix=f'{DATA_VERSION}/{CANDIDATE_PREFIX}/'):
#     if '.tfrecords' in blob.name:
#         candidate_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))

# candidate_dataset = tf.data.Dataset.from_tensor_slices(candidate_files)

# parsed_candidate_dataset = candidate_dataset.interleave(
#     train_utils.full_parse,
#     cycle_length=tf.data.AUTOTUNE, 
#     num_parallel_calls=tf.data.AUTOTUNE,
#     deterministic=False
# ).map(
#     feature_sets.parse_candidate_tfrecord_fn, 
#     num_parallel_calls=tf.data.AUTOTUNE
# ).with_options(
#     options
# )

# parsed_candidate_dataset = parsed_candidate_dataset.cache() #400 MB on machine mem
# # parsed_candidate_dataset

In [16]:
# # check dataset output
# for x in parsed_candidate_dataset.batch(1).take(1):
#     pprint(x)

### vocab dictionary

In [17]:
filehandler = open('vocab_dict.pkl', 'rb')
VOCAB_DICT = pkl.load(filehandler)
filehandler.close()

In [18]:
for keys in VOCAB_DICT:
    print(keys)

pl_name_src
track_name_pl
artist_name_pl
album_name_pl
artist_genres_pl
tracks_playlist_titles_pl
track_name_can
artist_name_can
album_name_can
artist_genres_can
track_pl_titles_can


## Build model

In [19]:
from src.ranking import tf_ranking_model as tfrm
from util import feature_sets, train_utils, test_instances

In [20]:
# USE_CROSS_LAYER = True

SEED = 1234
MAX_PLAYLIST_LENGTH = 5
EMBEDDING_DIM = 128   
PROJECTION_DIM = 25  

USE_DROPOUT = True
DROPOUT_RATE = 0.33
MAX_TOKENS = 20000
LAYER_SIZES=[256,64]

LR = .1
opt = tf.keras.optimizers.Adagrad(LR)

In [22]:
model = tfrm.TheRankingModel(
    layer_sizes=LAYER_SIZES
    , vocab_dict = VOCAB_DICT
    , embedding_dim = EMBEDDING_DIM
    , projection_dim = PROJECTION_DIM
    , seed = SEED
    , use_dropout = USE_DROPOUT
    , dropout_rate = DROPOUT_RATE
    , max_tokens = MAX_TOKENS
)

In [23]:
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [24]:
model.fit(valid_dataset, epochs=2) # valid_dataset | train_dataset

Epoch 1/2


2023-06-28 23:33:39.478592: E tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.cc:288] gpu_async_0 cuMemAllocAsync failed to allocate 1151775744 bytes: CUDA error: out of memory (CUDA_ERROR_OUT_OF_MEMORY)
 Reported by CUDA: Free memory/Total memory: 200802304/15634661376
2023-06-28 23:33:39.478648: E tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.cc:293] Stats: Limit:                      2153250816
InUse:                      2712671445
MaxInUse:                   2712671449
NumAllocs:                         229
MaxAllocSize:               1151775744
Reserved:                            0
PeakReserved:                        0
LargestFreeBlock:                    0

2023-06-28 23:33:39.478663: E tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.cc:56] Histogram of current allocation: (allocation_size_in_bytes, nb_allocation_of_that_sizes), ...;
2023-06-28 23:33:39.478672: E tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocat

ResourceExhaustedError: in user code:

    File "/opt/conda/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/opt/conda/lib/python3.10/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/lib/python3.10/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/opt/conda/lib/python3.10/site-packages/tensorflow_recommenders/models/base.py", line 68, in train_step
        loss = self.compute_loss(inputs, training=True)
    File "/home/jupyter/spotify_mpd_two_tower/src/ranking/tf_ranking_model.py", line 995, in compute_loss
        rating_predictions = self(features)
    File "/opt/conda/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None

    ResourceExhaustedError: Exception encountered when calling layer 'the_ranking_model' (type TheRankingModel).
    
    in user code:
    
        File "/home/jupyter/spotify_mpd_two_tower/src/ranking/tf_ranking_model.py", line 958, in call  *
            return self.ranking_model(features)
        File "/opt/conda/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
    
        ResourceExhaustedError: Exception encountered when calling layer 'ranking_model' (type RankingModel).
        
        in user code:
        
            File "/home/jupyter/spotify_mpd_two_tower/src/ranking/tf_ranking_model.py", line 856, in call  *
                all_embs = tf.concat(
            File "/opt/conda/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
            File "/opt/conda/lib/python3.10/site-packages/keras/backend.py", line 2100, in random_uniform
                return tf.random.stateless_uniform(
        
            ResourceExhaustedError: Exception encountered when calling layer 'track_uri_can_emb_model' (type Sequential).
            
            {{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:AddV2]
            
            Call arguments received by layer 'track_uri_can_emb_model' (type Sequential):
              • inputs=tf.Tensor(shape=(None,), dtype=string)
              • training=None
              • mask=None
        
        
        Call arguments received by layer 'ranking_model' (type RankingModel):
          • data={'album_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'album_name_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'album_uri_can': 'tf.Tensor(shape=(None,), dtype=string)', 'album_uri_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artist_followers_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_genres_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_genres_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artist_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_name_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artist_pop_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_pop_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'artist_uri_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_uri_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artists_followers_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'duration_ms_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'duration_ms_songs_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'num_pl_albums_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'num_pl_artists_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'num_pl_songs_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'pl_collaborative_src': 'tf.Tensor(shape=(None,), dtype=string)', 'pl_duration_ms_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'pl_name_src': 'tf.Tensor(shape=(None,), dtype=string)', 'track_acousticness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_acousticness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_danceability_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_danceability_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_energy_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_energy_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_instrumentalness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_instrumentalness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_key_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_key_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_liveness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_liveness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_loudness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_loudness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_mode_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_mode_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_name_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_pop_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_pop_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_speechiness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_speechiness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_tempo_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_tempo_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_time_signature_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_time_signature_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_uri_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_uri_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_valence_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_valence_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)'}
    
    
    Call arguments received by layer 'the_ranking_model' (type TheRankingModel):
      • features={'album_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'album_name_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'album_uri_can': 'tf.Tensor(shape=(None,), dtype=string)', 'album_uri_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artist_followers_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_genres_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_genres_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artist_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_name_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artist_pop_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_pop_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'artist_uri_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_uri_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'artists_followers_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'duration_ms_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'duration_ms_songs_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'num_pl_albums_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'num_pl_artists_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'num_pl_songs_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'pl_collaborative_src': 'tf.Tensor(shape=(None,), dtype=string)', 'pl_duration_ms_new': 'tf.Tensor(shape=(None,), dtype=float32)', 'pl_name_src': 'tf.Tensor(shape=(None,), dtype=string)', 'track_acousticness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_acousticness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_danceability_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_danceability_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_energy_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_energy_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_instrumentalness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_instrumentalness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_key_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_key_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_liveness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_liveness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_loudness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_loudness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_mode_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_mode_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_name_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_pop_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_pop_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_speechiness_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_speechiness_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_tempo_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_tempo_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)', 'track_time_signature_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_time_signature_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_uri_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_uri_pl': 'tf.Tensor(shape=(None, 5), dtype=string)', 'track_valence_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_valence_pl': 'tf.Tensor(shape=(None, 5), dtype=float32)'}


In [26]:
model.evaluate(valid_dataset, return_dict=True)



{'root_mean_squared_error': 0.08815838396549225,
 'loss': 0.012934082187712193,
 'regularization_loss': 0,
 'total_loss': 0.012934082187712193}

In [22]:
# rank_model = tfrm.RankingModel(
#     layer_sizes=LAYER_SIZES
#     , vocab_dict = VOCAB_DICT
#     , embedding_dim = EMBEDDING_DIM
#     , projection_dim = PROJECTION_DIM
#     , seed = SEED
#     , use_dropout = USE_DROPOUT
#     , dropout_rate = DROPOUT_RATE
#     , max_tokens = MAX_TOKENS
# )

# for i, l in enumerate(rank_model.layers):
#     print(i, l.name)

0 pl_name_src_text_embedding
1 pl_collaborative_emb_model
2 pl_duration_ms_new_emb_model
3 num_pl_songs_new_emb_model
4 num_pl_artists_new_emb_model
5 num_pl_albums_new_emb_model
6 track_uri_pl_emb_model
7 track_name_pl_emb_model
8 artist_uri_pl_emb_model
9 artist_name_pl_emb_model
10 album_uri_pl_emb_model
11 album_name_pl_emb_model
12 artist_genres_pl_emb_model
13 duration_ms_songs_pl_emb_model
14 track_pop_pl_emb_model
15 artist_pop_pl_emb_model
16 artists_followers_pl_emb_model
17 track_danceability_pl_emb_model
18 track_energy_pl_emb_model
19 track_key_pl_emb_model
20 track_loudness_pl_emb_model
21 track_mode_pl_emb_model
22 track_speechiness_pl_emb_model
23 track_acousticness_pl_emb_model
24 track_instrumentalness_pl_emb_model
25 track_liveness_pl_emb_model
26 track_valence_pl_emb_model
27 track_tempo_pl_emb_model
28 time_signature_pl_emb_model
29 sequential


In [33]:
# print(x for x in instances)

# test = {
#     'pl_name_src': ['Capoeira']
#     , 'pl_collaborative_src': ['false']
#     , 'pl_duration_ms_new': [17971314.0]
#     , 'num_pl_songs_new': [85.0]
#     , 'num_pl_artists_new': [5.0]
#     , 'num_pl_albums_new': [9.0]
# }

# test

test_full = {
    'album_name_can': ['Capoeira Electronica'],
    'album_name_pl': [['Odilara', 'Capoeira Electronica', 'Capoeira Ultimate','Festa Popular', 'Capoeira Electronica']],
    'album_uri_can': ['spotify:album:2FsSSHGt8JM0JgRy6ZX3kR'],
    'album_uri_pl': [[
        'spotify:album:4Y8RfvZzCiApBCIZswj9Ry',
        'spotify:album:2FsSSHGt8JM0JgRy6ZX3kR',
        'spotify:album:55HHBqZ2SefPeaENOgWxYK',
        'spotify:album:150L1V6UUT7fGUI3PbxpkE',
        'spotify:album:2FsSSHGt8JM0JgRy6ZX3kR'
    ]],
    'artist_followers_can': [5170.0],
    'artist_genres_can': ['capoeira'],
    'artist_genres_pl': ['samba moderno', 'capoeira', 'capoeira', 'NONE','capoeira'],
    'artist_name_can': 'Capoeira Experience',
    'artist_name_pl': [['Odilara', 'Capoeira Experience', 'Denis Porto', 'Zambe','Capoeira Experience']],
    'artist_pop_can': [24.0],
    'artist_pop_pl':[[ 4., 24.,  2.,  0., 24.]],
    'artist_uri_can': 'spotify:artist:5SKEXbgzIdRl3gQJ23CnUP',
    'artist_uri_pl': [[
        'spotify:artist:72oameojLOPWYB7nB8rl6c',
        'spotify:artist:5SKEXbgzIdRl3gQJ23CnUP',
        'spotify:artist:67p5GMYQZOgaAfx1YyttQk',
        'spotify:artist:4fH3OXCRcPsaHFE5KhgqZS',
        'spotify:artist:5SKEXbgzIdRl3gQJ23CnUP'
    ]],
    'artists_followers_pl': [[ 316., 5170.,  448.,   19., 5170.]],
    'duration_ms_can': [192640.0],
    'duration_ms_songs_pl': [[234612., 226826., 203480., 287946., 271920.]],
    'num_pl_albums_new': [9.0],
    'num_pl_artists_new': [5.0],
    'num_pl_songs_new': [85.0],
    'pl_collaborative_src': ['false'],
    'pl_duration_ms_new': [17971314.0],
    'pl_name_src': ['Capoeira'],
    'time_signature_can': ['4'],
    'track_time_signature_pl': [['4', '4', '4', '4', '4']],
    'track_acousticness_can': [0.478],
    'track_acousticness_pl': [[0.238 , 0.105 , 0.0242, 0.125 , 0.304 ]],
    'track_danceability_can': [0.709],
    'track_danceability_pl': [[0.703, 0.712, 0.806, 0.529, 0.821]],
    'track_energy_can': [0.742],
    'track_energy_pl': [[0.743, 0.41 , 0.794, 0.776, 0.947]],
    'track_instrumentalness_can': [0.00297],
    'track_instrumentalness_pl': [[4.84e-06, 4.30e-01, 7.42e-04, 4.01e-01, 5.07e-03]],
    'track_key_can': ['0'],
    'track_key_pl': [['5', '0', '1', '10', '10']],
    'track_liveness_can': [0.0346],
    'track_liveness_pl': [[0.128 , 0.0725, 0.191 , 0.105 , 0.0552]],
    'track_loudness_can': [-7.295],
    'track_loudness_pl': [[-8.638, -8.754, -9.084, -7.04 , -6.694]],
    'track_mode_can': ['1'],
    'track_mode_pl': [['0', '1', '1', '0', '1']],
    'track_name_can': ['Bezouro Preto - Studio'],
    'track_name_pl': [['O Telefone Tocou Novamente', 'Bem Devagar - Studio','Angola Dream', 'Janaina', 'Louco Berimbau - Studio']],
    'track_pop_can': [3.0],
    'track_pop_pl': [[5., 1., 0., 0., 1.]],
    'track_speechiness_can': [0.0802],
    'track_speechiness_pl':[[0.0367, 0.0272, 0.0407, 0.132 , 0.0734]],
    'track_tempo_can': [172.238],
    'track_tempo_pl': [[100.039,  89.089, 123.999, 119.963, 119.214]],
    'track_uri_can': ['spotify:track:0tlhK4OvpHCYpReTABvKFb'],
    'track_uri_pl': [[
        'spotify:track:1pQkOdcTDfLr84TDCrmGy7',
        'spotify:track:39grEDsAHAjmo2QFo4G8D9',
        'spotify:track:5vxSLdJXqbKYH487YO8LSL',
        'spotify:track:6T9GbmZ6voDM4aTBsG5VDh',
        'spotify:track:7ELt9eslVvWo276pX2garN'
    ]],
    'track_valence_can': [0.844],
    'track_valence_pl': [[0.966, 0.667, 0.696, 0.876, 0.655]],
}

In [34]:
# rank_model(test)
rank_model(test_full)



<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.00134374]], dtype=float32)>

In [None]:
# features['pl_name_src']
# features['pl_collaborative_src']
# features["pl_duration_ms_new"]
# features["num_pl_songs_new"]
# features["num_pl_artists_new"]
# features["num_pl_albums_new"]
# features['track_uri_pl']
# features['track_name_pl'
# features["artist_uri_pl"]
# features["artist_name_pl"
# features["album_uri_pl"]
# features["album_name_pl"
# features["artist_genres_pl"
# features["duration_ms_songs_pl"]
# features["track_pop_pl"]
# features["artist_pop_pl"]
# features["artists_followers_pl"]
# features["track_danceability_pl"]
# features["track_energy_pl"]
# features["track_key_pl"]
# features["track_loudness_pl"]
# features["track_mode_pl"]
# features["track_speechiness_pl"]
# features["track_acousticness_pl"]
# features["track_instrumentalness_pl"]
# features["track_liveness_pl"]
# features["track_valence_pl"]
# features["track_tempo_pl"]
# features["track_time_signature_pl"]

# #### candidates
# , features['track_uri_can']
# , features['track_name_can']
# , features['artist_uri_can']
# , features['artist_name_can']
# , features['album_uri_can']
# , features['album_name_can']
# , features['duration_ms_can']
# , features['track_pop_can']
# , features['artist_pop_can']
# , features['artist_genres_can']
# , features['artist_followers_can']
# , features['track_danceability_can']
# , features['track_energy_can']
# , features['track_key_can']
# , features['track_loudness_can']
# , features['track_mode_can']
# , features['track_speechiness_can']
# , features['track_acousticness_can']
# , features['track_instrumentalness_can']
# , features['track_liveness_can']
# , features['track_valence_can']
# , features['track_tempo_can']
# , features['track_time_signature_can']

In [23]:
VOCAB_DICT['pl_name_src']

['',
 '[UNK]',
 'country',
 'music',
 'rock',
 'chill',
 'summer',
 'party',
 'songs',
 'good',
 'jams',
 'rap',
 'playlist',
 'the',
 'new',
 'my',
 'oldies',
 'old',
 'christmas',
 'mix',
 'workout',
 '2017',
 'vibes',
 'throwback',
 '2016',
 'car',
 'classic',
 'work',
 '90s',
 'school',
 'road',
 'worship',
 '2015',
 'lit',
 'love',
 'dance',
 'feels',
 'stuff',
 'up',
 'hop',
 'pop',
 'best',
 'hip',
 'out',
 '80s',
 'it',
 'trip',
 'throwbacks',
 'wedding',
 'edm',
 'hip hop',
 'classics',
 'disney',
 'of',
 'time',
 'road trip',
 'old school',
 'classic rock',
 'fall',
 'gym',
 'tunes',
 'slow',
 'christian',
 'random',
 'jamz',
 '17',
 'spring',
 'happy',
 'alternative',
 'all',
 'for',
 'rb',
 'run',
 'feel',
 'running',
 'study',
 'indie',
 'i',
 'me',
 '2014',
 'hype',
 'driving',
 'favorites',
 'spanish',
 'feel good',
 'house',
 'day',
 '2',
 '16',
 'sleep',
 'back',
 'beach',
 'but',
 'you',
 'goodies',
 'chillin',
 'sad',
 'mellow',
 'hits',
 'my music',
 'roadtrip',
 'g

In [None]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [None]:
model = tfrm.TheRankingModel

# Matching Engine - tests

In [52]:
from google.cloud import aiplatform as vertex_ai

# Vertex SDK 
vertex_ai.init(project=PROJECT_ID, location=REGION)

import time

In [53]:
INDEX_ENDPOINT_URI = 'projects/934903580331/locations/us-central1/indexEndpoints/4438886771006111744' # ann 50e

In [54]:
ME_index_endpoint = vertex_ai.MatchingEngineIndexEndpoint(INDEX_ENDPOINT_URI)
ME_index_endpoint

<google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint.MatchingEngineIndexEndpoint object at 0x7f91a38304c0> 
resource name: projects/934903580331/locations/us-central1/indexEndpoints/4438886771006111744

In [55]:
DEPLOYED_INDEX_ID = ME_index_endpoint.deployed_indexes[0].id
print(f"DEPLOYED_INDEX_ID: {DEPLOYED_INDEX_ID}")

ME_index_endpoint.deployed_indexes

DEPLOYED_INDEX_ID: deployedann_e2e_v1


[id: "deployedann_e2e_v1"
index: "projects/934903580331/locations/us-central1/indexes/68706282596466688"
create_time {
  seconds: 1681232232
  nanos: 385340000
}
private_endpoints {
  match_grpc_address: "10.41.2.5"
}
index_sync_time {
  seconds: 1687983053
  nanos: 643665000
}
automatic_resources {
  min_replica_count: 2
  max_replica_count: 2
}
deployment_group: "default"
]

In [56]:
ENDPOINT_URI = 'projects/934903580331/locations/us-central1/endpoints/3302875755234459648'
model_endpoint = vertex_ai.Endpoint(ENDPOINT_URI)
model_endpoint

<google.cloud.aiplatform.models.Endpoint object at 0x7f91a3833790> 
resource name: projects/934903580331/locations/us-central1/endpoints/3302875755234459648

In [57]:
print(model_endpoint.gca_resource.deployed_models[0])

id: "4612884486101663744"
model: "projects/934903580331/locations/us-central1/models/8584575572326219776"
display_name: "query-tower-sp-2tower-tfrs-e2e-v1-pipev4"
create_time {
  seconds: 1681222077
  nanos: 256958000
}
dedicated_resources {
  machine_spec {
    machine_type: "n1-standard-16"
  }
  min_replica_count: 1
  max_replica_count: 1
}
service_account: "notebooksa@hybrid-vertex.iam.gserviceaccount.com"
model_version_id: "1"



In [58]:
# # MAX_PLAYLIST_LENGTH --> 5
TEST_INSTANCE_5 = test_instances.TEST_INSTANCE_5
TEST_INSTANCE_5

{'album_name_can': 'Capoeira Electronica',
 'album_name_pl': ['Odilara',
  'Capoeira Electronica',
  'Capoeira Ultimate',
  'Festa Popular',
  'Capoeira Electronica'],
 'album_uri_can': 'spotify:album:2FsSSHGt8JM0JgRy6ZX3kR',
 'album_uri_pl': ['spotify:album:4Y8RfvZzCiApBCIZswj9Ry',
  'spotify:album:2FsSSHGt8JM0JgRy6ZX3kR',
  'spotify:album:55HHBqZ2SefPeaENOgWxYK',
  'spotify:album:150L1V6UUT7fGUI3PbxpkE',
  'spotify:album:2FsSSHGt8JM0JgRy6ZX3kR'],
 'artist_followers_can': 5170.0,
 'artist_genres_can': 'capoeira',
 'artist_genres_pl': ['samba moderno',
  'capoeira',
  'capoeira',
  'NONE',
  'capoeira'],
 'artist_name_can': 'Capoeira Experience',
 'artist_name_pl': ['Odilara',
  'Capoeira Experience',
  'Denis Porto',
  'Zambe',
  'Capoeira Experience'],
 'artist_pop_can': 24.0,
 'artist_pop_pl': [4.0, 24.0, 2.0, 0.0, 24.0],
 'artist_uri_can': 'spotify:artist:5SKEXbgzIdRl3gQJ23CnUP',
 'artist_uri_pl': ['spotify:artist:72oameojLOPWYB7nB8rl6c',
  'spotify:artist:5SKEXbgzIdRl3gQJ23CnUP',


In [60]:
playlist_emb = model_endpoint.predict([TEST_INSTANCE_5])
playlist_emb

Prediction(predictions=[[2.13599348, 0.665572762, 0.306572795, -0.642993093, 0.905611515, -0.451913774, -1.04571533, -1.47509861, 0.667647898, 1.392694, -0.580560744, 2.1635685, 1.15477574, 0.890988827, 0.987143219, -0.344569176, -0.727035463, -0.337408423, -1.34015453, -1.03236663, -0.695001364, -0.106723189, 0.0395636633, 1.66761637, 0.981151044, 0.879824638, 0.642675459, 2.18829846, -1.37034965, 0.116360746, -0.356807619, -0.00551904133, -1.61825109, 1.00126624, -0.543757379, -0.640756607, 0.584731758, -1.26177561, 1.33843577, 0.260981917, 0.691716552, 0.555235922, 0.835755944, 0.242919102, 0.508965909, -0.551774085, -0.752674818, 1.07970881, -0.278400809, 1.20874846, 0.634759903, 0.753106713, -0.331600159, -0.451509297, -0.44077149, -0.706078768, 0.518127501, 0.231186, 1.1358285, -0.811849892, -0.790187061, 0.0396273322, 0.101898566, -1.96892595, 0.238233238, -0.289123625, 0.353820384, 0.371209621, 1.90042961, -0.360776, 0.874980628, 0.49637109, -0.448792934, -0.249126494, -1.58931

In [68]:
len(playlist_emb.predictions[0])

128

In [72]:
import numpy as np 

original = playlist_emb.predictions[0]
opposite = (original * np.ones(128) * -1)
opposite

array([-2.13599348, -0.66557276, -0.3065728 ,  0.64299309, -0.90561152,
        0.45191377,  1.04571533,  1.47509861, -0.6676479 , -1.392694  ,
        0.58056074, -2.1635685 , -1.15477574, -0.89098883, -0.98714322,
        0.34456918,  0.72703546,  0.33740842,  1.34015453,  1.03236663,
        0.69500136,  0.10672319, -0.03956366, -1.66761637, -0.98115104,
       -0.87982464, -0.64267546, -2.18829846,  1.37034965, -0.11636075,
        0.35680762,  0.00551904,  1.61825109, -1.00126624,  0.54375738,
        0.64075661, -0.58473176,  1.26177561, -1.33843577, -0.26098192,
       -0.69171655, -0.55523592, -0.83575594, -0.2429191 , -0.50896591,
        0.55177409,  0.75267482, -1.07970881,  0.27840081, -1.20874846,
       -0.6347599 , -0.75310671,  0.33160016,  0.4515093 ,  0.44077149,
        0.70607877, -0.5181275 , -0.231186  , -1.1358285 ,  0.81184989,
        0.79018706, -0.03962733, -0.10189857,  1.96892595, -0.23823324,
        0.28912362, -0.35382038, -0.37120962, -1.90042961,  0.36

In [73]:
original

[2.13599348,
 0.665572762,
 0.306572795,
 -0.642993093,
 0.905611515,
 -0.451913774,
 -1.04571533,
 -1.47509861,
 0.667647898,
 1.392694,
 -0.580560744,
 2.1635685,
 1.15477574,
 0.890988827,
 0.987143219,
 -0.344569176,
 -0.727035463,
 -0.337408423,
 -1.34015453,
 -1.03236663,
 -0.695001364,
 -0.106723189,
 0.0395636633,
 1.66761637,
 0.981151044,
 0.879824638,
 0.642675459,
 2.18829846,
 -1.37034965,
 0.116360746,
 -0.356807619,
 -0.00551904133,
 -1.61825109,
 1.00126624,
 -0.543757379,
 -0.640756607,
 0.584731758,
 -1.26177561,
 1.33843577,
 0.260981917,
 0.691716552,
 0.555235922,
 0.835755944,
 0.242919102,
 0.508965909,
 -0.551774085,
 -0.752674818,
 1.07970881,
 -0.278400809,
 1.20874846,
 0.634759903,
 0.753106713,
 -0.331600159,
 -0.451509297,
 -0.44077149,
 -0.706078768,
 0.518127501,
 0.231186,
 1.1358285,
 -0.811849892,
 -0.790187061,
 0.0396273322,
 0.101898566,
 -1.96892595,
 0.238233238,
 -0.289123625,
 0.353820384,
 0.371209621,
 1.90042961,
 -0.360776,
 0.874980628,
 0

In [74]:
DEPLOYED_ANN_INDEX_ID = 'deployedann_e2e_v1'

In [78]:
ANN_response = ME_index_endpoint.match(
    deployed_index_id=DEPLOYED_ANN_INDEX_ID,
    queries=[original],
    num_neighbors=10
)
# ANN_response

In [79]:
ANN_response

[[MatchNeighbor(id='spotify:track:5ujxgPve1zpywKhdhprjd7', distance=-68.44378662109375),
  MatchNeighbor(id='spotify:track:4IkKAbJxc6XMt6dP0Wxn9q', distance=-69.26606750488281),
  MatchNeighbor(id='spotify:track:77sA6Lhq1aHyeiDn0Ezynk', distance=-69.32096862792969),
  MatchNeighbor(id='spotify:track:3oTZEx1KVuCaGnDwOaluRv', distance=-69.49063873291016),
  MatchNeighbor(id='spotify:track:2GclgkNkaCOgauDyGSgn5O', distance=-69.74806213378906),
  MatchNeighbor(id='spotify:track:4djyU5pTSNYq2ZuBFRia7I', distance=-69.94599914550781),
  MatchNeighbor(id='spotify:track:0nhk50UU12qwXp3ap9KZH5', distance=-70.12556457519531),
  MatchNeighbor(id='spotify:track:6wL1b6VtUdmddjXzpURDBr', distance=-70.27442932128906),
  MatchNeighbor(id='spotify:track:7oKRbhVq8AqvnfIrSmh6bN', distance=-70.29933166503906),
  MatchNeighbor(id='spotify:track:2wb8SJRrCx4k7bgTk0HPXU', distance=-70.32716369628906)]]

In [80]:
ANN_response_v2 = ME_index_endpoint.match(
    deployed_index_id=DEPLOYED_ANN_INDEX_ID,
    queries=[opposite],
    num_neighbors=10
)
# ANN_response

In [81]:
ANN_response_v2

[[MatchNeighbor(id='spotify:track:2vsNqssAT6DwTr8XMtkvuI', distance=120.07003784179688),
  MatchNeighbor(id='spotify:track:6FrqntvCqYk6pTloZ5NGcS', distance=119.92662811279297),
  MatchNeighbor(id='spotify:track:1GzygBcsXQW5HVEvIky76a', distance=118.85808563232422),
  MatchNeighbor(id='spotify:track:0rcB5TG1EeMRkZFJDkDtGF', distance=118.80642700195312),
  MatchNeighbor(id='spotify:track:4nr2whZo0rB6blw1BaacZz', distance=117.85485076904297),
  MatchNeighbor(id='spotify:track:2XIuiCqPKof2hJfNzLJ9uW', distance=117.66204833984375),
  MatchNeighbor(id='spotify:track:20EFdLOP1Rm37ha8BLbOYe', distance=117.60924530029297),
  MatchNeighbor(id='spotify:track:0atv7V8rRNHYY9hroCQddh', distance=117.54971313476562),
  MatchNeighbor(id='spotify:track:5JELaD59SYjlNvl47UggH3', distance=117.53295135498047),
  MatchNeighbor(id='spotify:track:5WAyXO575REnpzp56Lm3m9', distance=117.46258544921875)]]