# Two-Tower for Neural Deep Retrieval

> model baseline for spotify modeling experiments

In [1]:
# set variables
SEED = 41781897
PROJECT_ID = 'hybrid-vertex'
BQ_LOCATION='us-central1'

### Pip

In [2]:
!pip freeze | grep tensorflow

tensorflow @ file:///opt/conda/conda-bld/dlenv-tf-2-9-cpu_1656643241567/work/tensorflow-2.9.0rc2-cp37-cp37m-linux_x86_64.whl
tensorflow-cloud==0.1.16
tensorflow-datasets==4.4.0
tensorflow-estimator==2.9.0
tensorflow-hub==0.12.0
tensorflow-io==0.23.1
tensorflow-io-gcs-filesystem==0.26.0
tensorflow-metadata==1.9.0
tensorflow-probability==0.14.1
tensorflow-recommenders==0.6.0
tensorflow-serving-api==2.9.0
tensorflow-transform==1.9.0


### Import Package

In [50]:
import warnings
warnings.filterwarnings("ignore") #do this b/c there's an info-level bug that can safely be ignored

# from tensorflow.python.framework import ops
# from tensorflow.python.framework import dtypes
# from tensorflow_io.bigquery import BigQueryClient
# from tensorflow_io.bigquery import BigQueryReadSession

import json
import tensorflow as tf
import tensorflow_recommenders as tfrs
import datetime
from tensorflow.python.lib.io import file_io
from tensorflow.train import BytesList, Feature, FeatureList, Int64List, FloatList
from tensorflow.train import SequenceExample, FeatureLists

import google.cloud.aiplatform as aiplatform

import os
import numpy as np
import pickle as pkl

from pprint import pprint

In [4]:
import tensorflow as tf
print(tf. __version__)

2.9.0-rc2


## TF-Record reader

In [104]:
SAMPLE_FILE = "gs://spotify-tfrecords-blog/train_v2/spotify-000002-12227.tfrec"
sample_dataset = tf.data.TFRecordDataset(SAMPLE_FILE)

for raw_record in sample_dataset.take(1):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

# count examples in a TF-record
# cnt = parsed_candidate_ds.reduce(np.int64(0), lambda x, _: x + 1)

features {
  feature {
    key: "album_name_can"
    value {
      bytes_list {
        value: "Peace Is The Mission: Extended"
      }
    }
  }
  feature {
    key: "album_name_seed_track"
    value {
      bytes_list {
        value: "Everything Is 4"
      }
    }
  }
  feature {
    key: "artist_followers_can"
    value {
      float_list {
        value: 6271327.0
      }
    }
  }
  feature {
    key: "artist_followers_seed_track"
    value {
      float_list {
        value: 10917261.0
      }
    }
  }
  feature {
    key: "artist_genres_can"
    value {
      bytes_list {
        value: "\'dance pop\', \'edm\', \'electro house\', \'moombahton\', \'pop\', \'pop dance\', \'pop rap\', \'tropical house\'"
      }
    }
  }
  feature {
    key: "artist_genres_seed_track"
    value {
      bytes_list {
        value: "\'dance pop\', \'pop\', \'pop rap\', \'post-teen pop\'"
      }
    }
  }
  feature {
    key: "artist_name_can"
    value {
      bytes_list {
        value: "Major 

### train (query & candidate features)

In [47]:
from google.cloud import storage

client = storage.Client()

# =========================
# all training data
# =========================
train_files = []

BUCKET_NAME_TRAIN = 'spotify-tfrecords-blog'
OBJ_PATH_TRAIN = 'train_v2'

for blob in client.list_blobs(f'{BUCKET_NAME_TRAIN}', prefix=f'{OBJ_PATH_TRAIN}/', delimiter='/'):
    train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))

# =========================
# all candidate data
# =========================
candidate_files = []

BUCKET_NAME_CANDIDATE = 'spotify-tfrecords-blog'
OBJ_PATH_CANDIDATE = 'candidate_tracks_v2'

for blob in client.list_blobs(f'{BUCKET_NAME_CANDIDATE}', prefix=f'{OBJ_PATH_CANDIDATE}/', delimiter='/'):
    candidate_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
    
print(f"Length of train_files: {len(train_files)}")
print(f"Length of candidate_files: {len(candidate_files)}")

print(f"Example of train_files: {train_files[0]}")
print(f"Example of candidate_files: {candidate_files[0]}")

Length of train_files: 2025
Length of candidate_files: 184
Example of train_files: gs://spotify-tfrecords-blog/train_v2/spotify-000000-5343.tfrec
Example of candidate_files: gs://spotify-tfrecords-blog/candidate_tracks_v2/spotify_000000-12227.tfrec


### parsing function - train data

In [110]:
# =========================
# Training Data TF Reocrds
# =========================

train_context_features = {
    # playlist - context features
    'name': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'collaborative': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'n_songs_pl': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'num_artists_pl': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'num_albums_pl': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'description_pl': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    # seed track - context features
    'track_name_seed_track': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'artist_name_seed_track': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'album_name_seed_track': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'duration_seed_track': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'track_pop_seed_track': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'artist_pop_seed_track': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'artist_genres_seed_track': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'artist_followers_seed_track': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    # candidate - context features
    'track_name_can': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'artist_name_can': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'album_name_can': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'track_uri_can': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'duration_ms_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'track_pop_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'artist_pop_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
    'artist_genres_can': tf.io.FixedLenFeature(dtype=tf.string, shape=[]),
    'artist_followers_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=[]),
}

# Sequence tracks - ragged
train_sequence_features = {
    'track_name_pl': tf.io.RaggedFeature(tf.string),
    'artist_name_pl': tf.io.RaggedFeature(tf.string),
    'album_name_pl': tf.io.RaggedFeature(tf.string),
    'duration_ms_songs_pl': tf.io.RaggedFeature(tf.float32),
    'artist_pop_pl': tf.io.RaggedFeature(tf.float32),
    'artists_followers_pl': tf.io.RaggedFeature(tf.float32),
    'track_pop_pl': tf.io.RaggedFeature(tf.float32),
    'artist_genres_pl': tf.io.RaggedFeature(tf.string),
}

# parse_sequence_example | parse_single_sequence_example
def parse_train_tfrecord_fn(example):
    example = tf.io.parse_single_sequence_example( 
        example, 
        sequence_features=train_sequence_features, 
        context_features=train_context_features
    )
    return example

# TF data input pipeline params
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA

# parse training data - SAMPLE_FILE_TRAIN | train_files
raw_train_ds = tf.data.TFRecordDataset(train_files)

parsed_train_ds = raw_train_ds.map(
    parse_train_tfrecord_fn, 
    num_parallel_calls=tf.data.AUTOTUNE
).with_options(options)

In [111]:
parsed_train_ds

<_OptionsDataset element_spec=({'album_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'album_name_seed_track': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_followers_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_followers_seed_track': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_genres_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_genres_seed_track': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_name_seed_track': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_pop_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_pop_seed_track': TensorSpec(shape=(), dtype=tf.float32, name=None), 'collaborative': TensorSpec(shape=(), dtype=tf.string, name=None), 'description_pl': TensorSpec(shape=(), dtype=tf.string, name=None), 'duration_ms_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'duration_seed_track': T

In [88]:
for _ in parsed_train_ds.batch(2).take(1):
    print(_)

({'album_name_can': <tf.Tensor: shape=(2,), dtype=string, numpy=
array([b'Zappruder Collection #2', b'Music for Thanksgiving Dinner'],
      dtype=object)>, 'album_name_seed_track': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'ZABA', b'100 World Classical Masterpieces'], dtype=object)>, 'artist_followers_can': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>, 'artist_followers_seed_track': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2604423.,    3468.], dtype=float32)>, 'artist_genres_can': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'NONE', b'NONE'], dtype=object)>, 'artist_genres_seed_track': <tf.Tensor: shape=(2,), dtype=string, numpy=
array([b"'gauze pop', 'indietronica', 'shiver pop'",
       b"'balkan classical piano'"], dtype=object)>, 'artist_name_can': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'Modern Pleasure', b'Ferdinand Land'], dtype=object)>, 'artist_name_seed_track': <tf.Tensor: shape=(2,), dtype=string, numpy

In [85]:
for _ in parsed_train_ds.batch(2).take(1):
    print(_)

({'album_name_can': <tf.Tensor: shape=(2, 1), dtype=string, numpy=
array([[b'Zappruder Collection #2'],
       [b'Music for Thanksgiving Dinner']], dtype=object)>, 'album_name_seed_track': <tf.Tensor: shape=(2, 1), dtype=string, numpy=
array([[b'ZABA'],
       [b'100 World Classical Masterpieces']], dtype=object)>, 'artist_followers_can': <tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.],
       [0.]], dtype=float32)>, 'artist_followers_seed_track': <tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[2604423.],
       [   3468.]], dtype=float32)>, 'artist_genres_can': <tf.Tensor: shape=(2, 1), dtype=string, numpy=
array([[b'NONE'],
       [b'NONE']], dtype=object)>, 'artist_genres_seed_track': <tf.Tensor: shape=(2, 1), dtype=string, numpy=
array([[b"'gauze pop', 'indietronica', 'shiver pop'"],
       [b"'balkan classical piano'"]], dtype=object)>, 'artist_name_can': <tf.Tensor: shape=(2, 1), dtype=string, numpy=
array([[b'Modern Pleasure'],
       [b'Ferdinand Land']], dt

In [112]:
## Ragged

MAX_SEQUENCE_LENGTH = 375

options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA

# function for sequence features
def return_rag_pl_tensors(context, sequence):
        a = sequence['artist_name_pl'].to_tensor(default_value='', shape=[None, 375])
        b = sequence['track_name_pl'].to_tensor(default_value='', shape=[None, 375])
        c = sequence['duration_ms_songs_pl'].to_tensor(default_value=0.0, shape=[None, 375])
        d = sequence['album_name_pl'].to_tensor(default_value='', shape=[None, 375])
        e = sequence['artist_pop_pl'].to_tensor(default_value=0.0, shape=[None, 375])
        f = sequence['artists_followers_pl'].to_tensor(default_value=0.0, shape=[None, 375])
        g = sequence['track_pop_pl'].to_tensor(default_value=0.0, shape=[None, 375])
        h = sequence['artist_genres_pl'].to_tensor(default_value='', shape=[None, 375])
        context2 = context.copy()
        context2['artist_name_pl'] = a
        context2['track_name_pl'] = b
        context2['duration_ms_songs_pl'] = c
        context2['album_name_pl'] = d
        context2['artist_pop_pl'] = e
        context2['artists_followers_pl'] = f
        context2['track_pop_pl'] = g
        context2['artist_genres_pl'] = h
        return context2
    

parsed_train_ds_2 = parsed_train_ds.map(
    return_rag_pl_tensors,
    num_parallel_calls=tf.data.AUTOTUNE
).with_options(options)

In [113]:
parsed_train_ds_2

<_OptionsDataset element_spec={'album_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'album_name_seed_track': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_followers_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_followers_seed_track': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_genres_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_genres_seed_track': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_name_seed_track': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_pop_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_pop_seed_track': TensorSpec(shape=(), dtype=tf.float32, name=None), 'collaborative': TensorSpec(shape=(), dtype=tf.string, name=None), 'description_pl': TensorSpec(shape=(), dtype=tf.string, name=None), 'duration_ms_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'duration_seed_track': Te

In [114]:
for _ in parsed_train_ds_2.batch(2).take(1):
    print(_)

{'album_name_can': <tf.Tensor: shape=(2,), dtype=string, numpy=
array([b'Zappruder Collection #2', b'Music for Thanksgiving Dinner'],
      dtype=object)>, 'album_name_seed_track': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'ZABA', b'100 World Classical Masterpieces'], dtype=object)>, 'artist_followers_can': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>, 'artist_followers_seed_track': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2604423.,    3468.], dtype=float32)>, 'artist_genres_can': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'NONE', b'NONE'], dtype=object)>, 'artist_genres_seed_track': <tf.Tensor: shape=(2,), dtype=string, numpy=
array([b"'gauze pop', 'indietronica', 'shiver pop'",
       b"'balkan classical piano'"], dtype=object)>, 'artist_name_can': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'Modern Pleasure', b'Ferdinand Land'], dtype=object)>, 'artist_name_seed_track': <tf.Tensor: shape=(2,), dtype=string, numpy=

## Candidate features

### inspect single TF-Record

In [9]:
# feature_dict = tf.io.parse_single_example(example_batch[0], features)

# SAMPLE_FILE = "gs://spotify-tfrecords-blog/candidate_tracks_v2/spotify_000002-8473.tfrec"
# sample_dataset = tf.data.TFRecordDataset(SAMPLE_FILE)

# for raw_record in sample_dataset.take(1):
#     example = tf.train.Example()
#     example.ParseFromString(raw_record.numpy())
#     print(example)

features {
  feature {
    key: "album_name_can"
    value {
      bytes_list {
        value: "Westbrook"
      }
    }
  }
  feature {
    key: "artist_followers_can"
    value {
      float_list {
        value: 171.0
      }
    }
  }
  feature {
    key: "artist_genres_can"
    value {
      bytes_list {
        value: ""
      }
    }
  }
  feature {
    key: "artist_name_can"
    value {
      bytes_list {
        value: "Westbrook"
      }
    }
  }
  feature {
    key: "artist_pop_can"
    value {
      float_list {
        value: 0.0
      }
    }
  }
  feature {
    key: "duration_ms_can"
    value {
      float_list {
        value: 295948.0
      }
    }
  }
  feature {
    key: "track_name_can"
    value {
      bytes_list {
        value: "Run Away"
      }
    }
  }
  feature {
    key: "track_pop_can"
    value {
      float_list {
        value: 0.0
      }
    }
  }
  feature {
    key: "track_uri_can"
    value {
      bytes_list {
        value: "spotify:track:6SXo

### parsing function - candidate data

In [115]:
# =====================
# Candidate TF Records
# =====================

candidate_features = {
    #candidate features
    'track_name_can': tf.io.FixedLenFeature(dtype=tf.string, shape=()), # shape=(1)),
    'artist_name_can': tf.io.FixedLenFeature(dtype=tf.string, shape=()), # shape=(1)),
    'album_name_can': tf.io.FixedLenFeature(dtype=tf.string, shape=()), # shape=(1)),
    'track_uri_can': tf.io.FixedLenFeature(dtype=tf.string, shape=()), # shape=(1)),
    'duration_ms_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=()), # shape=(1)),
    'track_pop_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=()), # shape=(1)),
    'artist_pop_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=()), # shape=(1)),
    'artist_genres_can': tf.io.FixedLenFeature(dtype=tf.string, shape=()), # shape=(1)),
    'artist_followers_can': tf.io.FixedLenFeature(dtype=tf.float32, shape=()), # shape=(1)),
}

def parse_candidate_tfrecord_fn(example):
    example = tf.io.parse_single_example(
        example, 
        features=candidate_features, 
    )
    return example

# TF data input pipeline params
options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA

# parse candidate data
raw_candidate_ds = tf.data.TFRecordDataset(candidate_files)

parsed_candidate_ds = raw_candidate_ds.map(
    parse_candidate_tfrecord_fn, 
    num_parallel_calls=tf.data.AUTOTUNE
).with_options(options)

In [116]:
parsed_candidate_ds

<_OptionsDataset element_spec={'album_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_followers_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_genres_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_pop_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'duration_ms_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'track_pop_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_uri_can': TensorSpec(shape=(), dtype=tf.string, name=None)}>

In [117]:
for _ in parsed_candidate_ds.batch(1).take(1):
    print(_)

{'album_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Psalms of David Vol 3: "The Earth Is the Lord\'s"'], dtype=object)>, 'artist_followers_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>, 'artist_genres_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b''], dtype=object)>, 'artist_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'O Peasgood'], dtype=object)>, 'artist_pop_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>, 'duration_ms_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([175426.], dtype=float32)>, 'track_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Psalm 146: Praise The Lord O My Soul'], dtype=object)>, 'track_pop_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>, 'track_uri_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'spotify:track:6TnOG25AkWwC067u82Du2S'], dtype=object)>}


## Import Vocab & Stats

In [23]:
import pickle as pkl
from google.cloud import storage

BUCKET_NAME = 'spotify-v1'
FILE_PATH = 'vocabs/v1_string_vocabs'
FILE_NAME = 'string_vocabs_v1_20220705-202905.txt'
DESTINATION_FILE = 'downloaded_vocabs.txt'

client = storage.Client()

with open(f'{DESTINATION_FILE}', 'wb') as file_obj:
    client.download_blob_to_file(
        f'gs://{BUCKET_NAME}/{FILE_PATH}/{FILE_NAME}', file_obj)

    
with open(f'{DESTINATION_FILE}', 'rb') as pickle_file:
    vocab_dict_load = pkl.load(pickle_file)

In [24]:
# vocab_dict_load['unique_pids']
# vocab_dict_load

### TODO: Add stats to vocab_file

In [25]:
avg_duration_ms_seed_pl = 13000151.68
var_duration_ms_seed_pl = 133092900971233.58
vocab_dict_load['avg_duration_ms_seed_pl']=avg_duration_ms_seed_pl
vocab_dict_load['var_duration_ms_seed_pl']=var_duration_ms_seed_pl

avg_n_songs_pl = 55.21
var_n_songs_pl = 2317.54
vocab_dict_load['avg_n_songs_pl']=avg_n_songs_pl
vocab_dict_load['var_n_songs_pl']=var_n_songs_pl

avg_n_artists_pl = 30.56
var_n_artists_pl = 769.26
vocab_dict_load['avg_n_artists_pl']=avg_n_artists_pl
vocab_dict_load['var_n_artists_pl']=var_n_artists_pl

avg_n_albums_pl = 40.25
var_n_albums_pl = 1305.54
vocab_dict_load['avg_n_albums_pl']=avg_n_albums_pl
vocab_dict_load['var_n_albums_pl']=var_n_albums_pl

avg_artist_pop = 16.08
var_artist_pop = 300.64
vocab_dict_load['avg_artist_pop']=avg_artist_pop
vocab_dict_load['var_artist_pop']=var_artist_pop

avg_duration_ms_songs_pl = 234823.14
var_duration_ms_songs_pl = 5558806228.41
vocab_dict_load['avg_duration_ms_songs_pl']=avg_duration_ms_songs_pl
vocab_dict_load['var_duration_ms_songs_pl']=var_duration_ms_songs_pl

avg_artist_followers = 43337.77
var_artist_followers = 377777790193.57
vocab_dict_load['avg_artist_followers']=avg_artist_followers
vocab_dict_load['var_artist_followers']=var_artist_followers

avg_track_pop = 10.85
var_track_pop = 202.18
vocab_dict_load['avg_track_pop']=avg_track_pop
vocab_dict_load['var_track_pop']=var_track_pop

# Two-Tower

In [102]:
# parsed_train_ds

for _ in parsed_train_ds_2.batch(1).take(1):
    print(_)

{'album_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Zappruder Collection #2'], dtype=object)>, 'album_name_seed_track': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'ZABA'], dtype=object)>, 'artist_followers_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>, 'artist_followers_seed_track': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([2604423.], dtype=float32)>, 'artist_genres_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'NONE'], dtype=object)>, 'artist_genres_seed_track': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b"'gauze pop', 'indietronica', 'shiver pop'"], dtype=object)>, 'artist_name_can': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Modern Pleasure'], dtype=object)>, 'artist_name_seed_track': <tf.Tensor: shape=(1,), dtype=string, numpy=array([b'Glass Animals'], dtype=object)>, 'artist_pop_can': <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>, 'artist_pop_seed_track'

In [105]:
test_instance = {
    'name': np.asarray([b'Best Christmas']),
    'collaborative': np.asarray([b'false']),
    'description_pl': np.asarray([b'test description']),
    'n_songs_pl': np.asarray([58.]),
    'num_artists_pl': np.asarray([19.]),
    'num_albums_pl': np.asarray([27.]),
    'artist_name_pl': np.asarray([b'Juan Luis Guerra 4.40', b'Prince Royce', b'Luis Vargas']), # b'Prince Royce', b'Luis Vargas']]),
    # 'track_uri_pl': np.asarray([b'spotify:track:1g0IBPZTRP7VYkctJ4Qafg','','']), # ,b'spotify:track:43wUzbYxEFoXugYkgTzMWp']]),
    'track_name_pl': np.asarray([b'Lover Come Back','','']), # , b'White Lightning', b'Shake Me Down']]),
    'duration_ms_songs_pl': np.asarray([245888., 0., 0.]), # , 195709., 283906., 271475., 300373., 275173., 236145.,]]),
    'album_name_pl': np.asarray([b'Silsulim','','']), # , b'Sara Shara', b'Muzika Vesheket', b'Ba La Lirkod']]),
    'artist_pop_pl': np.asarray([81., 0., 0.]), # , 81., 70., 66., 66., 66., 46., 87.]]),
    'artists_followers_pl': np.asarray([3.556710e+05, 0., 0.]), # , 8.200000e+02, 1.510000e+02, 1.098080e+05,]]),
    'artist_genres_pl': np.asarray([b"'israeli pop'", b'Juan Luis Guerra 4.40', b'Juan Luis Guerra 4.40',]), # , 'jewish pop'", b"'israeli pop', 'jewish pop'",]]),
    'track_pop_pl': np.asarray([19.0, 0.0, 0.0]), # , 2.0, 19.0, 26.0, 8.0, 28.0, 3.0, 8.0, 36.0, 9.0, 1.0, 20.0,]]),
    # seed track features
    'artist_name_seed_track': np.asarray([b'The Shadowboxers']),
    'track_name_seed_track': np.asarray([b'Ba La Lirkod']),
    'album_name_seed_track': np.asarray([b'Kaththi (Original Motion Picture Soundtrack)']),
    'duration_seed_track': np.asarray([291002.0]),
    'track_pop_seed_track': np.asarray([27.0]),
    'artist_pop_seed_track': np.asarray([51.0]),
    'artist_followers_seed_track':np.asarray([29.0]),
    'artist_genres_seed_track': np.asarray([b"'neon pop punk'"]), #, 'pop punk'"]),
}

from pprint import pprint
pprint(test_instance)

{'album_name_pl': array(['Silsulim', '', ''], dtype='<U8'),
 'album_name_seed_track': array([b'Kaththi (Original Motion Picture Soundtrack)'], dtype='|S44'),
 'artist_followers_seed_track': array([29.]),
 'artist_genres_pl': array([b"'israeli pop'", b'Juan Luis Guerra 4.40',
       b'Juan Luis Guerra 4.40'], dtype='|S21'),
 'artist_genres_seed_track': array([b"'neon pop punk'"], dtype='|S15'),
 'artist_name_pl': array([b'Juan Luis Guerra 4.40', b'Prince Royce', b'Luis Vargas'],
      dtype='|S21'),
 'artist_name_seed_track': array([b'The Shadowboxers'], dtype='|S16'),
 'artist_pop_pl': array([81.,  0.,  0.]),
 'artist_pop_seed_track': array([51.]),
 'artists_followers_pl': array([355671.,      0.,      0.]),
 'collaborative': array([b'false'], dtype='|S5'),
 'description_pl': array([b'test description'], dtype='|S16'),
 'duration_ms_songs_pl': array([245888.,      0.,      0.]),
 'duration_seed_track': array([291002.]),
 'n_songs_pl': array([58.]),
 'name': array([b'Best Christmas'], d

## Query Tower

In [106]:
EMBEDDING_DIM = 32
RNN_UNITS = 256
PROJECTION_DIM = 5
SEED = 1234
USE_CROSS_LAYER=True
DROPOUT='False'
DROPOUT_RATE='0.33'

class QueryModel(tf.keras.Model):
    def __init__(self, layer_sizes, vocab_dict):
        super().__init__()

        # ========================================
        # non-sequence playlist features
        # ========================================
        
        # Feature: playlist name
        self.pl_name_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    # max_tokens=len(vocab_dict["name"]), # not needed if passing vocab
                    vocabulary=vocab_dict['name'], 
                    name="pl_name_txt_vectorizer", 
                    ngrams=2
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["name"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="pl_name_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="pl_name_pooling"),
            ], name="pl_name_emb_model"
        )
        
        # Feature: collaborative
        collaborative_vocab = np.array([b'false', b'true'])
        
        self.pl_collaborative_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=collaborative_vocab, 
                    mask_token=None, 
                    name="pl_collaborative_lookup", 
                    output_mode='int'
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(collaborative_vocab),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="pl_collaborative_emb_layer",
                ),
            ], name="pl_collaborative_emb_model"
        )
        
        # Feature: description_pl
        self.pl_description_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    # max_tokens=len(vocab_dict["description_pl"]), # not needed if passing vocab
                    vocabulary=vocab_dict['description_pl'], 
                    name="description_pl_vectorizer", 
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["description_pl"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="description_pl_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="description_pl_pooling"),
            ], name="pl_description_emb_model"
        )
        
        # Feature: duration_ms_seed_pl                      
        # TODO: Noramlize or Descritize?
        duration_ms_seed_pl_buckets = np.linspace(
            vocab_dict['min_duration_ms_seed_pl'], 
            vocab_dict['max_duration_ms_seed_pl'], 
            num=1000
        )
        self.duration_ms_seed_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(duration_ms_seed_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(duration_ms_seed_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="duration_ms_seed_pl_emb_layer",
                )
            ], name="duration_ms_seed_pl_emb_model"
        )
        # self.duration_ms_seed_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_duration_ms_seed_pl'],
        #     variance=vocab_dict['var_duration_ms_seed_pl'],
        #     axis=None
        # )
        
        # Feature: n_songs_pl
        # TODO: Noramlize or Descritize?
        n_songs_pl_buckets = np.linspace(
            vocab_dict['min_n_songs_pl'], 
            vocab_dict['max_n_songs_pl'], 
            num=100
        )
        self.n_songs_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(n_songs_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(n_songs_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="n_songs_pl_emb_layer",
                )
            ], name="n_songs_pl_emb_model"
        )
        # self.n_songs_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_n_songs_pl'],
        #     variance=vocab_dict['var_n_songs_pl'],
        #     axis=None
        # )
        
        # Feature: num_artists_pl
        # TODO: Noramlize or Descritize?
        n_artists_pl_buckets = np.linspace(
            vocab_dict['min_n_artists_pl'], 
            vocab_dict['max_n_artists_pl'], 
            num=100
        )
        self.n_artists_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(n_artists_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(n_artists_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="n_artists_pl_emb_layer",
                )
            ], name="n_artists_pl_emb_model"
        )
        # self.n_artists_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_n_artists_pl'],
        #     variance=vocab_dict['var_n_artists_pl'],
        #     axis=None
        # )
        
        # Feature: num_albums_pl
        n_albums_pl_buckets = np.linspace(
            vocab_dict['min_n_albums_pl'], 
            vocab_dict['max_n_albums_pl'],
            num=100
        )
        self.n_albums_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(n_albums_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(n_albums_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="n_albums_pl_emb_layer",
                )
            ], name="n_albums_pl_emb_model"
        )
        # self.n_albums_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_n_albums_pl'],
        #     variance=vocab_dict['var_n_albums_pl'],
        #     axis=None
        # )
        
        # ========================================
        # sequence playlist features
        # ========================================
        
        # Feature: artist_name_pl
        self.artist_name_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['artist_name_pl'],
                    name="artist_name_pl_lookup",
                    mask_token='',
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['artist_name_pl']) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="artist_name_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artist_name_pl_emb_model"
        )
        
        # # Feature: track_uri_pl
        # # 2.2M unique
        # self.track_uri_pl_embedding = tf.keras.Sequential(
        #     [
        #         tf.keras.layers.Hashing(num_bins=200_000),
        #         tf.keras.layers.Embedding(
        #             input_dim=len(vocab_dict['track_uri_pl']) + 1, 
        #             output_dim=EMBEDDING_DIM,
        #             name="track_uri_pl_emb_layer",
        #         ),
        #         tf.keras.layers.GRU(EMBEDDING_DIM),
        #     ], name="track_uri_pl_emb_model"
        # )
        
        # Feature: track_name_pl
        self.track_name_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['track_name_pl'], 
                    name="track_name_pl_lookup",
                    output_mode='int',
                    mask_token='',
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['track_name_pl']), 
                    output_dim=EMBEDDING_DIM,
                    name="track_name_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="track_name_pl_emb_model"
        )
        
        # Feature: duration_ms_songs_pl
        duration_ms_songs_pl_buckets = np.linspace(
            vocab_dict['min_duration_ms_songs_pl'], 
            vocab_dict['max_duration_ms_songs_pl'], 
            num=100
        )
        self.duration_ms_songs_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(duration_ms_songs_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(duration_ms_songs_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="duration_ms_songs_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="duration_ms_songs_pl_emb_model"
        )
        
        # Feature: album_name_pl
        self.album_name_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['album_name_pl'], 
                    mask_token='',
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['album_name_pl']), 
                    output_dim=EMBEDDING_DIM,
                    name="album_name_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="album_name_pl_emb_model"
        )

        # Feature: artist_pop_pl
        artist_pop_pl_buckets = np.linspace(
            vocab_dict['min_artist_pop'], 
            vocab_dict['max_artist_pop'], 
            num=10
        )
        self.artist_pop_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(artist_pop_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(artist_pop_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="artist_pop_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artist_pop_pl_emb_model"
        )
        
        # Feature: artists_followers_pl
        artists_followers_pl_buckets = np.linspace(
            vocab_dict['min_artist_followers'], 
            vocab_dict['max_artist_followers'], 
            num=10
        )
        self.artists_followers_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(artists_followers_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(artists_followers_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="artists_followers_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artists_followers_pl_emb_model"
        )
        
        # Feature: track_pop_pl
        track_pop_pl_buckets = np.linspace(
            vocab_dict['min_track_pop'], 
            vocab_dict['max_track_pop'], 
            num=10
        )
        self.track_pop_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(track_pop_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(track_pop_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="track_pop_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="track_pop_pl_emb_model"
        )
        
        # Feature: artist_genres_pl
        self.artist_genres_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['artist_genres_pl'],
                    mask_token='',
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['artist_genres_pl']), 
                    output_dim=EMBEDDING_DIM,
                    name="artist_genres_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artist_genres_pl_emb_model"
        )
        
        # ========================================
        # seed track features
        # ========================================
        
        # Feature: artist_name_seed_track
        self.artist_name_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_name_seed_track"],
                    name="artist_name_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_name_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_name_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_name_seed_track_pooling"),
            ], name="artist_name_seed_track_emb_model"
        )
        
        # Feature: track_name_seed_track
        self.track_name_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["track_name_seed_track"],
                    name="track_name_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["track_name_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="track_name_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="track_name_seed_track_pooling"),
            ], name="track_name_seed_track_emb_model"
        )

        
        # Feature: album_name_seed_track
        self.album_name_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["album_name_seed_track"],
                    name="album_name_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["album_name_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="album_name_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="album_name_seed_track_pooling"),
            ], name="album_name_seed_track_emb_model"
        )
        
        # Feature: duration_seed_track
        self.duration_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_duration_ms_songs_pl'],
            variance=vocab_dict['var_duration_ms_songs_pl'],
            axis=None
        )
        
        # Feature: track_pop_seed_track
        self.track_pop_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_track_pop'],
            variance=vocab_dict['var_track_pop'],
            axis=None
        )
        
        # Feature: artist_pop_seed_track
        self.artist_pop_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_pop'],
            variance=vocab_dict['var_artist_pop'],
            axis=None
        )
        
        # Feature: artist_followers_seed_track
        self.artist_followers_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_followers'],
            variance=vocab_dict['var_artist_followers'],
            axis=None
        )
        
        # Feature: artist_genres_seed_track
        self.artist_genres_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_genres_seed_track"],
                    name="artist_genres_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_genres_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_genres_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_genres_seed_track_pooling"),
            ], name="artist_genres_seed_track_emb_model"
        )

        # ========================================
        # dense and cross layers
        # ========================================

        # Cross Layers
        if USE_CROSS_LAYER:
            self._cross_layer = tfrs.layers.dcn.Cross(
                projection_dim=PROJECTION_DIM,
                kernel_initializer="glorot_uniform", 
                name="pl_cross_layer"
            )
        else:
            self._cross_layer = None
            
        # Dense Layers
        self.dense_layers = tf.keras.Sequential(name="pl_dense_layers")
        initializer = tf.keras.initializers.GlorotUniform(seed=SEED)
        
        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    activation="relu", 
                    kernel_initializer=initializer,
                )
            )
            if DROPOUT:
                self.dense_layers.add(tf.keras.layers.Dropout(DROPOUT_RATE))
                
        # No activation for the last layer
        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    kernel_initializer=initializer
                )
            )
        ### ADDING L2 NORM AT THE END
        self.dense_layers.add(
            tf.keras.layers.Lambda(
                lambda x: tf.nn.l2_normalize(
                    x, 1, epsilon=1e-12, name="normalize_dense"
                )
            )
        )
    # ========================================
    # call
    # ========================================
    def call(self, data):
        '''
        The call method defines what happens when
        the model is called
        '''
        
        all_embs = tf.concat(
            [
                self.pl_name_text_embedding(data['name']),
                self.pl_collaborative_embedding(data['collaborative']),
                self.pl_description_text_embedding(data['description_pl']),
                self.n_songs_pl_embedding(data["n_songs_pl"]),
                # tf.reshape(self.n_songs_pl_normalization(data["n_songs_pl"]), (-1, 1))                        # Normalize or Discretize?
                self.n_artists_pl_embedding(data['num_artists_pl']),
                # tf.reshape(self.n_artists_pl_normalization(data["num_artists_pl"]), (-1, 1))                  # Normalize or Discretize?
                self.n_albums_pl_embedding(data["num_albums_pl"]),
                # tf.reshape(self.n_albums_pl_normalization(data["num_albums_pl"]), (-1, 1))                    # Normalize or Discretize?
                # sequence features
                self.artist_name_pl_embedding(data["artist_name_pl"]),
                self.track_uri_pl_embedding(data["track_uri_pl"]),
                self.track_name_pl_embedding(data["track_name_pl"]),
                self.duration_ms_songs_pl_embedding(data["duration_ms_songs_pl"]),
                self.album_name_pl_embedding(data["album_name_pl"]),
                self.artist_pop_pl_embedding(data["artist_pop_pl"]),
                self.artists_followers_pl_embedding(data["artists_followers_pl"]),
                self.track_pop_pl_embedding(data["track_pop_pl"]),
                self.artist_genres_pl_embedding(data["artist_genres_pl"]),
                # seed track
                self.artist_name_seed_track_text_embedding(data['artist_name_seed_track']),
                self.track_name_seed_track_text_embedding(data['track_name_seed_track']),
                self.album_name_seed_track_text_embedding(data["album_name_seed_track"]),
                # tf.reshape(self.duration_seed_track_normalized(data["duration_seed_track"]), (-1, 1)),
                # tf.reshape(self.track_pop_seed_track_normalized(data["track_pop_seed_track"]), (-1, 1)),
                # tf.reshape(self.artist_pop_seed_track_normalized(data["artist_pop_seed_track"]), (-1, 1)),
                # tf.reshape(self.artist_followers_seed_track_normalized(data["artist_followers_seed_track"]), (-1, 1)),
                self.duration_seed_track_normalized(data["duration_seed_track"]),
                self.track_pop_seed_track_normalized(data["track_pop_seed_track"]),
                self.artist_pop_seed_track_normalized(data["artist_pop_seed_track"]),
                self.artist_followers_seed_track_normalized(data["artist_followers_seed_track"]),
                self.artist_genres_seed_track_text_embedding(data["artist_genres_seed_track"]),
            ], axis=1)
        
        # Build Cross Network
        if self._cross_layer is not None:
            cross_embs = self._cross_layer(all_embs)
            return self.dense_layers(cross_embs)
        else:
            return self.dense_layers(all_embs)

In [107]:
layer_sizes=[64,32]

test_query_model = QueryModel(layer_sizes,vocab_dict_load)

pl_result = test_query_model(test_instance)

print(f"Shape of pl_result: {pl_result.shape}")
pl_result

TypeError: Exception encountered when calling layer "artist_name_pl_emb_model" (type Sequential).

call() missing 1 required positional argument: 'states'

Call arguments received by layer "artist_name_pl_emb_model" (type Sequential):
  • inputs=tf.Tensor(shape=(3,), dtype=string)
  • training=False
  • mask=None

In [80]:
test_query_model.summary(expand_nested=True)

Model: "query_model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 pl_name_emb_model (Sequenti  (None, 32)               2368896   
 al)                                                             
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| pl_name_txt_vectorizer (Tex  (None, None)           0         |
| tVectorization)                                               |
|                                                               |
| pl_name_emb_layer (Embeddin  (None, None, 32)       2368896   |
| g)                                                            |
|                                                               |
| pl_name_pooling (GlobalAver  (None, 32)             0         |
| agePooling1D)                                                 |
¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
 pl_collaborative_emb_model   (1, 32)                

ValueError: Weights for model duration_ms_seed_pl_emb_model have not yet been created. Weights are created when the Model is first called on inputs or `build()` is called with an `input_shape`.

## Candidate Tower

In [46]:
# for x in parsed_candidate_ds.take(1).as_numpy_iterator():
#     pprint(x['artist_genres'])

In [40]:
can_test_instance = {
    'artist_name_can': np.asarray([b'Fabrizio Paterlini']),
    'track_name_can': np.asarray([b"Controvento, senz'olio"]),
    'album_name_can' : np.asarray([b'Fragments Found']),
    'duration_ms_can' : np.asarray([222747]),
    'track_pop_can' : np.asarray([28.0]),
    'artist_pop_can' : np.asarray([69.0]),
    'artist_followers_can' : np.asarray([90784.0]),
    'artist_genres_can' : np.asarray([b"'compositional ambient', 'neo-classical'"]),
}
pprint(can_test_instance)

{'album_name_can': array([b'Fragments Found'], dtype='|S15'),
 'artist_followers_can': array([90784.]),
 'artist_genres_can': array([b"'compositional ambient', 'neo-classical'"], dtype='|S40'),
 'artist_name_can': array([b'Fabrizio Paterlini'], dtype='|S18'),
 'artist_pop_can': array([69.]),
 'duration_ms_can': array([222747]),
 'track_name_can': array([b"Controvento, senz'olio"], dtype='|S22'),
 'track_pop_can': array([28.])}


In [42]:
EMBEDDING_DIM = 32
PROJECTION_DIM = 5
SEED = 1234
USE_CROSS_LAYER=True
DROPOUT='False'
DROPOUT_RATE='0.33'

class CandidateModel(tf.keras.Model):
    def __init__(self, layer_sizes, vocab_dict):
        super().__init__()
        
        # ========================================
        # Candidate features
        # ========================================
        
        # Feature: artist_name_can
        self.artist_name_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_name_can"],
                    name="artist_name_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_name_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_name_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_name_pooling"),
            ], name="artist_name_emb_model"
        )

        # Feature: track_name_can
        self.track_name_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["track_name_can"],
                    name="track_name_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["track_name_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="track_name_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="track_name_pooling"),
            ], name="track_name_emb_model"
        )
        
        # Feature: album_name_can
        self.album_name_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["album_name_can"],
                    name="album_name_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["album_name_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="album_name_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="album_name_pooling"),
            ], name="album_name_emb_model"
        )
        
        # Feature: duration_ms_can
        self.duration_ms_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_duration_ms_songs_pl'],
            variance=vocab_dict['var_duration_ms_songs_pl'],
            axis=None
        )
        
        # Feature: track_pop_can
        self.track_pop_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_track_pop'],
            variance=vocab_dict['var_track_pop'],
            axis=None
        )
        
        # Feature: artist_pop_can
        self.artist_pop_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_pop'],
            variance=vocab_dict['var_artist_pop'],
            axis=None
        )
        
        # Feature: artist_followers_can
        self.artist_followers_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_followers'],
            variance=vocab_dict['var_artist_followers'],
            axis=None
        )
        
        # Feature: artist_genres_can
        self.artist_genres_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_genres_can"],
                    name="artist_genres_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_genres_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_genres_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_genres_pooling"),
            ], name="artist_genres_emb_model"
        )
        # ========================================
        # Dense & Cross Layers
        # ========================================
        
        # Cross Layers
        
        # Dense Layer
        self.dense_layers = tf.keras.Sequential(name="candidate_dense_layers")
        initializer = tf.keras.initializers.GlorotUniform(seed=SEED)
        
        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    activation="relu", 
                    kernel_initializer=initializer,
                )
            )
            if DROPOUT:
                self.dense_layers.add(tf.keras.layers.Dropout(DROPOUT_RATE))
                
        # No activation for the last layer
        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    kernel_initializer=initializer
                )
            )
            
    # ========================================
    # Call Function
    # ========================================
            
    def call(self, data):
        
        all_embs = tf.concat(
            [
                self.artist_name_text_embedding(data['artist_name_can']),
                self.track_name_text_embedding(data['track_name_can']),
                self.album_name_text_embedding(data['album_name_can']),
                tf.reshape(self.duration_ms_normalized(data["duration_ms_can"]), (-1, 1)),
                tf.reshape(self.track_pop_normalized(data["track_pop_can"]), (-1, 1)),
                tf.reshape(self.artist_pop_normalized(data["artist_pop_can"]), (-1, 1)),
                tf.reshape(self.artist_followers_normalized(data["artist_followers_can"]), (-1, 1)),
                self.artist_genres_text_embedding(data['artist_genres_can']),
            ], axis=1
        )
        
        return self.dense_layers(all_embs)

In [43]:
layer_sizes=[64,32]

test_can_track_model = CandidateModel(layer_sizes, vocab_dict_load)

can_result = test_can_track_model(can_test_instance)

print(f"Shape of can_result: {can_result.shape}")
can_result

Shape of can_result: (1, 32)


<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[-0.09798299, -0.13597596, -0.20222108,  0.1804454 , -0.05496097,
         0.10369475, -0.24035081, -0.36623093, -0.1693298 , -0.29658014,
        -0.19157353, -0.06317501, -0.07559349, -0.2500427 ,  0.02374963,
         0.08970213,  0.03046359, -0.03684572,  0.39922062,  0.11256151,
        -0.4696306 , -0.54584545, -0.35822794, -0.17518142,  0.25293875,
         0.2434436 , -0.02671403,  0.28442368, -0.28969672,  0.00302041,
         0.02647205,  0.03832131]], dtype=float32)>

In [44]:
test_can_track_model.summary(expand_nested=True)

Model: "candidate_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 artist_name_emb_model (Sequ  (None, 32)               9206720   
 ential)                                                         
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| artist_name_txt_vectorizer   (None, None)           0         |
| (TextVectorization)                                           |
|                                                               |
| artist_name_emb_layer (Embe  (None, None, 32)       9206720   |
| dding)                                                        |
|                                                               |
| artist_name_pooling (Global  (None, 32)             0         |
| AveragePooling1D)                                             |
¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
 track_name_emb_model (Seque  (None, 32)           

## Combined Model

* Takes `QueryModel` and `CandidateModel` inputs (deep emebddings of size ARCH[-1])

In [56]:
# %%writefile -a vertex_train/trainer/task.py

class TwoTowers(tfrs.models.Model):

    def __init__(self, layer_sizes, vocab_dict_load):
        super().__init__()
        self.query_model = QueryModel(layer_sizes, vocab_dict_load)
        self.candidate_model = CandidateModel(layer_sizes, vocab_dict_load)
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=parsed_candidate_ds.batch(128).cache().map(self.candidate_model)
            )
        )

    def compute_loss(self, data, training=False):
        query_embeddings = self.query_model(data)
        product_embeddings = self.candidate_model(data)

        return self.task(
            query_embeddings, product_embeddings, compute_metrics=not training)   #### turn off metrics to save time on training

# Model Training

### Vertex Experiments

* create an experiment using the `init()` method and then initialize a run within the experiment using `start_run()`

* `aiplatform.init()` - Create an experiment instance
* `aiplatform.start_run()` - Track a specific run within the experiment.

In [None]:
# from datetime import datetime

# TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

# Specify a name for the experiment
# EXPERIMENT_NAME = "sp-towtower-baseline-v1"

# # Create experiment
# aiplatform.init(experiment=EXPERIMENT_NAME)
# aiplatform.start_run("run-1")

#### Log Paramters for the Experiment

In [None]:
# metaparams = {}
# metaparams["units"] = 128
# aiplatform.log_params(metaparams)

# hyperparams = {}
# hyperparams["epochs"] = 100
# hyperparams["batch_size"] = 32
# hyperparams["learning_rate"] = 0.01
# aiplatform.log_params(hyperparams)

#### Log metrics for the experiment

In [None]:
# metrics = {}
# metrics["test_acc"] = 98.7
# metrics["train_acc"] = 99.3
# aiplatform.log_metrics(metrics)

#### Get the experiment results

* Use the `end_run()` method to complete the logging for a run
* Use the experiment name as a parameter to the method `get_experiment_df()` to get the results of the experiment as a pandas dataframe.

```
aiplatform.end_run()
experiment_df = aiplatform.get_experiment_df()
experiment_df = experiment_df[experiment_df.experiment_name == EXPERIMENT_NAME]
experiment_df.T
```

#### Start subsequent run in an experiment

> `aiplatform.start_run("run-2")`

### TensorBoard

You can upload your TensorBoard logs by first creating a TensorBoard instance and then using the `tb-gcp-uploader` command to upload the logs. Once uploaded, the command will return a URL for connecting to the TensorBoard instance via the browser.

```
! tensorboard dev upload --logdir logs \
  --name "Simple experiment with MNIST" \
  --description "Training results" \
  --one_shot
```

Learn more with [TensorBoard overview](https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-overview).

* Checkout `get_started_vertex_tensorboard` [Colab](https://colab.sandbox.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage2/get_started_vertex_tensorboard.ipynb#scrollTo=h-nl9TW0RvNi)

In [51]:
TENSORBOARD_DISPLAY_NAME = "sp-2tower-v1"
tensorboard = aiplatform.Tensorboard.create(display_name=TENSORBOARD_DISPLAY_NAME)
tensorboard_resource_name = tensorboard.gca_resource.name
print("TensorBoard resource name:", tensorboard_resource_name)

Creating Tensorboard
Create Tensorboard backing LRO: projects/934903580331/locations/us-central1/tensorboards/709510455357341696/operations/464217726257201152
Tensorboard created. Resource name: projects/934903580331/locations/us-central1/tensorboards/709510455357341696
To use this Tensorboard in another session:
tb = aiplatform.Tensorboard('projects/934903580331/locations/us-central1/tensorboards/709510455357341696')
TensorBoard resource name: projects/934903580331/locations/us-central1/tensorboards/709510455357341696


In [52]:
tf.random.set_seed(SEED)
import keras
import time

TB_RESOURCE_NAME = 'projects/934903580331/locations/us-central1/tensorboards/709510455357341696'
invoke_time = time.strftime("%Y%m%d_%H%M%S")

EXPERIMENT_NAME = 'run-v1'

LOG_DIR = f"gs://spotify-v1/{EXPERIMENT_NAME}/logdir"

print(f"Tensorflow logs exported to {LOG_DIR})")
print(f"""Helper for copy/past TF log upload command:

tb-gcp-uploader --tensorboard_resource_name={TB_RESOURCE_NAME} \
  --logdir={LOG_DIR} \
  --experiment_name={EXPERIMENT_NAME} --one_shot=False
""")

tensorboard_cb = keras.callbacks.TensorBoard(
    log_dir=LOG_DIR,
    histogram_freq=0,
    embeddings_freq=0,
    update_freq="epoch",
    write_graph=True,
    profile_batch = '500,520'
)

Tensorflow logs exported to gs://spotify-v1/run-v1/logdir)
Helper for copy/past TF log upload command:

tb-gcp-uploader --tensorboard_resource_name=projects/934903580331/locations/us-central1/tensorboards/709510455357341696   --logdir=gs://spotify-v1/run-v1/logdir   --experiment_name=run-v1 --one_shot=False



2022-07-12 07:51:42.651001: I tensorflow/core/profiler/lib/profiler_session.cc:99] Profiler session initializing.
2022-07-12 07:51:42.651046: I tensorflow/core/profiler/lib/profiler_session.cc:114] Profiler session started.
2022-07-12 07:51:42.652302: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session tear down.


### Data Prep

In [53]:
# Training Data
shuffled = parsed_train_ds.shuffle(2000, seed=SEED, reshuffle_each_iteration=False)
parsed_dataset_candidates = parsed_candidate_ds.shuffle(10000, seed=SEED, reshuffle_each_iteration=False)

strategy= tf.distribute.MirroredStrategy()

# TODO: stratified sampling for train/test split (?)
test_size = 200_000
test = shuffled.take(test_size)
train = shuffled.skip(test_size)

# Data size may be too large for in-memory caching
# cached_train = train.batch(BATCH_SIZE * strategy.num_replicas_in_sync).prefetch(tf.data.AUTOTUNE)
# cached_test = test.batch(BATCH_SIZE * strategy.num_replicas_in_sync).prefetch(tf.data.AUTOTUNE)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


## Compile and Train Model

In [58]:
# model config
EMBEDDING_DIM = 32
PROJECTION_DIM = 5
# MAX_TOKENS = 5_000
# N_PRODUCTS = 5_000
# N_HIER = 5000
BATCH_SIZE = 128
ARCH = [64,32]
NUM_EPOCHS = 1
NUM_RECORDS = 65_346_428   # TODO: check TF-Records and train/valid samples count
TEST_PCT = 0.1
JOB_DIR = 'gs://trfs-tf-bucket' #TODO change to your directory
SEED = 1234
print(f'Batches per epoch: {NUM_RECORDS/BATCH_SIZE}')
USE_CROSS_LAYER = False
DROPOUT='False'
DROPOUT_RATE='0.33'

with strategy.scope():

    model = TwoTowers(layer_sizes=ARCH, vocab_dict_load=vocab_dict_load) # layer_sizes, vocab_dict_load

    model.compile(optimizer=tf.keras.optimizers.Adagrad(.1))

Batches per epoch: 510518.96875


### Local Training

Things to try to combat dimensions issues

* reshape train/test data to accomadate GRU layers (e.g., `x_train = x_train.reshape(-1, 1, 9)`)
* Try `GRUCell` instead of `GRU` layer; GRUCell expects 2D tensor as input `[batch, feature]`
> * `GRUCell` = processes one step within the whole time sequence input, 
> * `tf.keras.layer.GRU` processes the whole sequence; and expects 3dim
* provide an `input_length` with the `Embedding` layers (e.g., `...Embedding(1000, 64, input_length=10)`)

In [59]:
import time    

start_time = time.time()

NUM_EPOCHS = 10
layer_history = model.fit(
    train,
    validation_data=test,
    validation_freq=5,
    epochs=NUM_EPOCHS,
    callbacks=tensorboard_cb,
    verbose=1)

train_time = time.time() - start_time

print(f"Training for {NUM_EPOCHS} epoch, ran for: {train_time:.0} seconds")
accuracy = layer_history.history["factorized_top_k/top_100_categorical_accuracy"][-1]
print(f"Top 100 categorical accuracy: {accuracy}")

Epoch 1/10


2022-07-12 08:05:50.750231: W tensorflow/core/framework/dataset.cc:768] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


INFO:tensorflow:Error reported to Coordinator: Exception encountered when calling layer "query_model_3" (type QueryModel).

in user code:

    File "/tmp/ipykernel_18109/983446631.py", line 482, in call  *
        all_embs = tf.concat(

    TypeError: tuple indices must be integers or slices, not str


Call arguments received by layer "query_model_3" (type QueryModel):
  • data=({'album_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'album_name_seed_track': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_followers_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_followers_seed_track': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_genres_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_genres_seed_track': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_name_seed_track': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_pop_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_pop_seed_track': '

TypeError: in user code:

    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1051, in train_function  *
        return step_function(self, iterator)
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1040, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/lib/python3.7/site-packages/six.py", line 719, in reraise
        raise value
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1030, in run_step  **
        outputs = model.train_step(data)
    File "/home/jupyter/.local/lib/python3.7/site-packages/tensorflow_recommenders/models/base.py", line 68, in train_step
        loss = self.compute_loss(inputs, training=True)
    File "/tmp/ipykernel_18109/538400380.py", line 16, in compute_loss
        query_embeddings = self.query_model(data)
    File "/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/tmp/__autograph_generated_file_rb1v4sx.py", line 11, in tf__call
        all_embs = ag__.converted_call(ag__.ld(tf).concat, ([ag__.converted_call(ag__.ld(self).pl_name_text_embedding, (ag__.ld(data)['name'],), None, fscope), ag__.converted_call(ag__.ld(self).pl_collaborative_embedding, (ag__.ld(data)['collaborative'],), None, fscope), ag__.converted_call(ag__.ld(self).pl_description_text_embedding, (ag__.ld(data)['description_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).n_songs_pl_embedding, (ag__.ld(data)['n_songs_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).n_artists_pl_embedding, (ag__.ld(data)['num_artists_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).n_albums_pl_embedding, (ag__.ld(data)['num_albums_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).artist_name_pl_embedding, (ag__.ld(data)['artist_name_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).track_uri_pl_embedding, (ag__.ld(data)['track_uri_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).track_name_pl_embedding, (ag__.ld(data)['track_name_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).duration_ms_songs_pl_embedding, (ag__.ld(data)['duration_ms_songs_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).album_name_pl_embedding, (ag__.ld(data)['album_name_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).artist_pop_pl_embedding, (ag__.ld(data)['artist_pop_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).artists_followers_pl_embedding, (ag__.ld(data)['artists_followers_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).track_pop_pl_embedding, (ag__.ld(data)['track_pop_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).artist_genres_pl_embedding, (ag__.ld(data)['artist_genres_pl'],), None, fscope), ag__.converted_call(ag__.ld(self).artist_name_seed_track_text_embedding, (ag__.ld(data)['artist_name_seed_track'],), None, fscope), ag__.converted_call(ag__.ld(self).track_name_seed_track_text_embedding, (ag__.ld(data)['track_name_seed_track'],), None, fscope), ag__.converted_call(ag__.ld(self).album_name_seed_track_text_embedding, (ag__.ld(data)['album_name_seed_track'],), None, fscope), ag__.converted_call(ag__.ld(tf).reshape, (ag__.converted_call(ag__.ld(self).duration_seed_track_normalized, (ag__.ld(data)['duration_seed_track'],), None, fscope), ((- 1), 1)), None, fscope), ag__.converted_call(ag__.ld(tf).reshape, (ag__.converted_call(ag__.ld(self).track_pop_seed_track_normalized, (ag__.ld(data)['track_pop_seed_track'],), None, fscope), ((- 1), 1)), None, fscope), ag__.converted_call(ag__.ld(tf).reshape, (ag__.converted_call(ag__.ld(self).artist_pop_seed_track_normalized, (ag__.ld(data)['artist_pop_seed_track'],), None, fscope), ((- 1), 1)), None, fscope), ag__.converted_call(ag__.ld(tf).reshape, (ag__.converted_call(ag__.ld(self).artist_followers_seed_track_normalized, (ag__.ld(data)['artist_followers_seed_track'],), None, fscope), ((- 1), 1)), None, fscope), ag__.converted_call(ag__.ld(self).artist_genres_seed_track_text_embedding, (ag__.ld(data)['artist_genres_seed_track'],), None, fscope)],), dict(axis=1), fscope)

    TypeError: Exception encountered when calling layer "query_model_3" (type QueryModel).
    
    in user code:
    
        File "/tmp/ipykernel_18109/983446631.py", line 482, in call  *
            all_embs = tf.concat(
    
        TypeError: tuple indices must be integers or slices, not str
    
    
    Call arguments received by layer "query_model_3" (type QueryModel):
      • data=({'album_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'album_name_seed_track': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_followers_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_followers_seed_track': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_genres_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_genres_seed_track': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_name_seed_track': 'tf.Tensor(shape=(None,), dtype=string)', 'artist_pop_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'artist_pop_seed_track': 'tf.Tensor(shape=(None,), dtype=float32)', 'collaborative': 'tf.Tensor(shape=(None,), dtype=string)', 'description_pl': 'tf.Tensor(shape=(None,), dtype=string)', 'duration_ms_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'duration_seed_track': 'tf.Tensor(shape=(None,), dtype=float32)', 'n_songs_pl': 'tf.Tensor(shape=(None,), dtype=float32)', 'name': 'tf.Tensor(shape=(None,), dtype=string)', 'num_albums_pl': 'tf.Tensor(shape=(None,), dtype=float32)', 'num_artists_pl': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_name_can': 'tf.Tensor(shape=(None,), dtype=string)', 'track_name_seed_track': 'tf.Tensor(shape=(None,), dtype=string)', 'track_pop_can': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_pop_seed_track': 'tf.Tensor(shape=(None,), dtype=float32)', 'track_uri_can': 'tf.Tensor(shape=(None,), dtype=string)'}, {'album_name_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_23:0", shape=(None,), dtype=string), row_splits=Tensor("cond/Identity_24:0", shape=(None,), dtype=int32))', 'artist_genres_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_25:0", shape=(None,), dtype=string), row_splits=Tensor("cond/Identity_26:0", shape=(None,), dtype=int32))', 'artist_name_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_27:0", shape=(None,), dtype=string), row_splits=Tensor("cond/Identity_28:0", shape=(None,), dtype=int32))', 'artist_pop_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_29:0", shape=(None,), dtype=float32), row_splits=Tensor("cond/Identity_30:0", shape=(None,), dtype=int32))', 'artists_followers_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_31:0", shape=(None,), dtype=float32), row_splits=Tensor("cond/Identity_32:0", shape=(None,), dtype=int32))', 'duration_ms_songs_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_33:0", shape=(None,), dtype=float32), row_splits=Tensor("cond/Identity_34:0", shape=(None,), dtype=int32))', 'track_name_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_35:0", shape=(None,), dtype=string), row_splits=Tensor("cond/Identity_36:0", shape=(None,), dtype=int32))', 'track_pop_pl': 'tf.RaggedTensor(values=Tensor("cond/Identity_37:0", shape=(None,), dtype=float32), row_splits=Tensor("cond/Identity_38:0", shape=(None,), dtype=int32))'})
