# Multi-Tower Encoders with TFRS

In [1]:
# set variables
SEED = 41781897
PROJECT_ID = 'hybrid-vertex'
BQ_LOCATION='us-central1'

### Pip

> check package versions

In [2]:
!pip freeze | grep tensorflow

tensorflow==2.8.2
tensorflow-cloud==0.1.16
tensorflow-datasets==4.4.0
tensorflow-estimator==2.8.0
tensorflow-hub==0.12.0
tensorflow-io==0.23.0
tensorflow-io-gcs-filesystem==0.23.0
tensorflow-metadata==1.8.0
tensorflow-probability==0.14.1
tensorflow-recommenders==0.6.0
tensorflow-serving-api==2.8.0
tensorflow-transform==1.8.0


#### JT package versions (7/7)
```
tensorflow==2.9.1
tensorflow-cloud==0.1.16
tensorflow-data-validation==0.26.1
tensorflow-datasets==4.4.0
tensorflow-estimator==2.9.0
tensorflow-hub==0.12.0
tensorflow-io==0.26.0
tensorflow-io-gcs-filesystem==0.26.0
tensorflow-metadata==1.8.0
tensorflow-model-analysis==0.26.1
tensorflow-probability==0.14.1
tensorflow-recommenders==0.6.0
tensorflow-serving-api==2.9.0
tensorflow-transform==1.9.0
```
produces error:
> `UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.`

#### JT package versions (7/6)

```
tensorflow==2.9.1
tensorflow-cloud==0.1.13
tensorflow-data-validation==0.26.1
tensorflow-datasets==3.0.0
tensorflow-estimator==2.9.0
tensorflow-hub==0.9.0
tensorflow-io==0.26.0
tensorflow-io-gcs-filesystem==0.26.0
tensorflow-metadata==0.26.0
tensorflow-model-analysis==0.26.1
tensorflow-probability==0.11.0
tensorflow-recommenders==0.6.0
tensorflow-serving-api==2.3.0
tensorflow-transform==0.26.0
```

#### JW package versions 
```
tensorflow==2.3.1
tensorflow-cloud==0.1.16
tensorflow-datasets==4.4.0
tensorflow-estimator==2.3.0
tensorflow-hub==0.12.0
tensorflow-io==0.15.0
tensorflow-io-gcs-filesystem==0.26.0
tensorflow-metadata==1.8.0
tensorflow-probability==0.14.1
tensorflow-recommenders==0.6.0
tensorflow-serving-api==2.8.0
tensorflow-transform==1.8.0
```

In [3]:
# !pip install tensorflow-recommenders==0.6.0 --user
# !pip install -U tensorflow-io==0.26.0 --user
# pip install tensorflow-cloud==0.1.16
# pip install tensorflow-datasets==4.4.0
# pip install tensorflow-hub==0.12.0
# pip install tensorflow-metadata==1.8.0
# pip install tensorflow-probability==0.14.1
# pip install tensorflow-serving-api==2.9.0
# pip install tensorflow-transform==1.9.1

# pip uninstall tensorflow
# pip uninstall tensorflow-cloud
# pip uninstall tensorflow-data-validation
# pip uninstall tensorflow-datasets
# pip uninstall tensorflow-estimator
# pip uninstall tensorflow-hub
# pip uninstall tensorflow-io
# pip uninstall tensorflow-io-gcs-filesystem
# pip uninstall tensorflow-metadata
# pip uninstall tensorflow-model-analysis
# pip uninstall tensorflow-probability
# pip uninstall tensorflow-recommenders
# pip uninstall tensorflow-serving-api
# pip uninstall tensorflow-transform

### Import packages

> With TF 2.3, Autotune was experimental and was in nightly build but in TF 2.5, it is in packages

In [4]:
import warnings
warnings.filterwarnings("ignore") #do this b/c there's an info-level bug that can safely be ignored

from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from tensorflow_io.bigquery import BigQueryClient
from tensorflow_io.bigquery import BigQueryReadSession

import json
import tensorflow as tf
import tensorflow_recommenders as tfrs
import datetime
from tensorflow.python.lib.io import file_io
from tensorflow.train import BytesList, Feature, FeatureList, Int64List, FloatList
from tensorflow.train import SequenceExample, FeatureLists

import os

import numpy as np

from pprint import pprint

Using TensorFlow backend.
Using TensorFlow backend.


ImportError: cannot import name 'transpose_shape' from 'keras.utils.generic_utils' (/opt/conda/lib/python3.7/site-packages/keras/utils/generic_utils.py)

In [4]:
import tensorflow as tf
print(tf. __version__)

2.5.1


## Prep Train Data

* Use tensorflow-io to import from BigQuery
* Creates a [TF Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#methods_2) object 
* Vocab files for `TextVectorization` and `StringLookup` layers pre-computed and saved to GCS


See [end-to-end guide BQ -> TF Dataset](https://www.tensorflow.org/io/tutorials/bigquery#load_census_data_in_tensorflow_dataset_using_bigquery_reader) for tips

In [6]:
# ! rm -rf data_prep
# ! mkdir data_prep

In [7]:
bq_2_tf_dict = {
    'name': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'collaborative': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    # 'pid': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.int64},
    # 'duration_ms_playlist': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.int64},
    # 'pid_pos_id': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    # candidate features
    'pos_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.int64},
    'artist_name_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'track_uri_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'artist_uri_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'track_name_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'album_uri_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'duration_ms_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'album_name_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'track_pop_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'artist_pop_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'artist_genres_can': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'artist_followers_can': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    # seed track features
    'pos_seed_track': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.int64},
    'artist_name_seed_track': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'artist_uri_seed_track': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'track_name_seed_track': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'track_uri_seed_track': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'album_name_seed_track': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'album_uri_seed_track': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'duration_seed_track': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'track_pop_seed_track': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'artist_pop_seed_track': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'artist_genres_seed_track': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    'artist_followers_seed_track': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    ### playlist features
    'duration_ms_seed_pl': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'n_songs_pl': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'num_artists_pl': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'num_albums_pl': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.float64},
    'description_pl': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    # 'pos_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.int64},
    'artist_name_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.string},
    'track_uri_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.string},
    'track_name_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.string},
    'duration_ms_songs_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.float64},
    'album_name_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.string},
    'artist_pop_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.float64},
    'artists_followers_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.float64},              
    'track_pop_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.int64},
    'artist_genres_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.string},
}

In [8]:
BQ_TABLE_TRAIN = 'train_flatten'
BQ_DATASET_TRAIN = 'spotify_train_3'
io_batch_size = 1

bq_client = BigQueryClient()

bqsession = bq_client.read_session(
    "projects/" + PROJECT_ID,
    PROJECT_ID, 
    f'{BQ_TABLE_TRAIN}', 
    f'{BQ_DATASET_TRAIN}',
    bq_2_tf_dict,
    requested_streams=2,
)

dataset = bqsession.parallel_read_rows()
dataset = dataset.prefetch(1).shuffle(io_batch_size*10).batch(io_batch_size)

2022-07-07 04:55:39.814109: W tensorflow_io/core/kernels/audio_video_mp3_kernels.cc:271] libmp3lame.so.0 or lame functions are not available
2022-07-07 04:55:39.814450: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX2 FMA
2022-07-07 04:55:39.962800: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2022-07-07 04:55:39.962858: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-07-07 04:55:39.962885: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (spotify-tf-2-3-jt): /proc/driver/nvidia/version does not exist
2022-07-07 04:55:39.963314: I tensorflo

In [None]:
# pprint(dataset.take(1))
for x in dataset.take(1):
    print(x)

2022-07-07 04:55:45.550130: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 04:55:45.550196: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 04:55:45.550754: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 04:55:45.550802: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.


### Test

* ` 'repeated', 'nullable' or 'required'`

In [525]:
bq_3_tf_dict = {
    'name': {"mode": "nullable", "output_type": dtypes.string},
    'collaborative': {"mode": "nullable", "output_type": dtypes.string},
    'pid': {"mode": "nullable", "output_type": dtypes.int64},
    # 'duration_ms_playlist': {'mode': BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.int64},
    # 'pid_pos_id': {'mode':BigQueryClient.FieldMode.NULLABLE, 'output_type': dtypes.string},
    # candidate features
    'pos_can': {"mode":"nullable", "output_type": dtypes.int64},
    'artist_name_can': {"mode":"nullable", "output_type": dtypes.string},
    'track_uri_can': {"mode":"nullable", "output_type": dtypes.string},
    'artist_uri_can': {"mode":"nullable", "output_type": dtypes.string},
    'track_name_can': {"mode":"nullable", "output_type": dtypes.string},
    'album_uri_can': {"mode":"nullable", "output_type": dtypes.string},
    'duration_ms_can': {"mode":"nullable", "output_type": dtypes.float64},
    'album_name_can': {"mode":"nullable", "output_type": dtypes.string},
    'track_pop_can': {"mode":"nullable", "output_type": dtypes.float64},
    'artist_pop_can': {"mode":"nullable", "output_type": dtypes.float64},
    'artist_genres_can': {"mode": "nullable", "output_type": dtypes.string},
    'artist_followers_can': {"mode":"nullable", "output_type": dtypes.float64},
    # seed track features
    'pos_seed_track': {"mode":"nullable", "output_type": dtypes.int64},
    'artist_name_seed_track': {"mode":"nullable", "output_type": dtypes.string},
    'artist_uri_seed_track': {"mode":"nullable", "output_type": dtypes.string},
    'track_name_seed_track': {"mode": "nullable", "output_type": dtypes.string},
    'track_uri_seed_track': {"mode": "nullable", "output_type": dtypes.string},
    'album_name_seed_track': {"mode": "nullable", "output_type": dtypes.string},
    'album_uri_seed_track': {"mode": "nullable", "output_type": dtypes.string},
    'duration_seed_track': {"mode": "nullable", "output_type": dtypes.float64},
    'track_pop_seed_track': {"mode":"nullable", "output_type": dtypes.float64},
    'artist_pop_seed_track': {"mode":"nullable", "output_type": dtypes.float64},
    'artist_genres_seed_track': {"mode": "nullable", "output_type": dtypes.string},
    'artist_followers_seed_track': {"mode":"nullable", "output_type": dtypes.float64},
    ### playlist features
    'duration_ms_seed_pl': {"mode": "nullable", "output_type": dtypes.float64},
    'n_songs_pl': {"mode": "nullable", "output_type": dtypes.float64},
    'num_artists_pl': {"mode": "nullable", "output_type": dtypes.float64},
    'num_albums_pl': {"mode": "nullable", "output_type": dtypes.float64},
    'description_pl': {"mode": "nullable", "output_type": dtypes.string},
    # 'pos_pl': {'mode': BigQueryClient.FieldMode.REPEATED, 'output_type': dtypes.int64},
    'artist_name_pl': {"mode": "repeated", "output_type": dtypes.string},
    'track_uri_pl': {"mode": "repeated", "output_type": dtypes.string},
    'track_name_pl': {"mode": "repeated", "output_type": dtypes.string},
    'duration_ms_songs_pl': {"mode": "repeated", "output_type": dtypes.float64},
    'album_name_pl': {"mode": "repeated", "output_type": dtypes.string},
    'artist_pop_pl': {"mode": "repeated", "output_type": dtypes.float64},
    'artists_followers_pl': {"mode": "repeated", "output_type": dtypes.float64},              
    'track_pop_pl': {"mode": "repeated", "output_type": dtypes.int64},
    'artist_genres_pl': {"mode": "repeated", "output_type": dtypes.string},
}

In [528]:
BQ_TABLE_TRAIN = 'train_flatten'
BQ_DATASET_TRAIN = 'spotify_train_3'
io_batch_size = 1

bq_client = BigQueryClient()

bqsession_2 = bq_client.read_session(
    parent = "projects/" + PROJECT_ID,
    project_id=PROJECT_ID, 
    table_id = f'{BQ_TABLE_TRAIN}', 
    dataset_id = f'{BQ_DATASET_TRAIN}',
    selected_fields = bq_2_tf_dict,
    requested_streams = 2,
)

dataset_2 = bqsession_2.parallel_read_rows()
dataset_2 = dataset_2.prefetch(1).shuffle(io_batch_size*10).batch(io_batch_size)

In [1]:
for x in dataset_2.take(1):
    print(x)

NameError: name 'dataset_2' is not defined

# Multi-Tower Model

Create a DNN nominator based on a 3-tower architecture, with `playlist`, `seed_track`, and `candidate_track` towers (TODO: insert diagram)

* In this model, we have **two** kinds of query embeddings: `playlist` and `seed_track`
* Within `playlist`, we include the sequence of tracks in each playlist
* We propose an attention-based playlist model to summarize the track sequence

In [7]:
import pickle as pkl
from google.cloud import storage

BUCKET_NAME = 'spotify-v1'
FILE_PATH = 'vocabs/v1_string_vocabs'
FILE_NAME = 'string_vocabs_v1_20220705-202905.txt'
DESTINATION_FILE = 'downloaded_vocabs.txt'

client = storage.Client()

with open(f'{DESTINATION_FILE}', 'wb') as file_obj:
    client.download_blob_to_file(
        f'gs://{BUCKET_NAME}/{FILE_PATH}/{FILE_NAME}', file_obj)

    
with open(f'{DESTINATION_FILE}', 'rb') as pickle_file:
    vocab_dict_load = pkl.load(pickle_file)


In [246]:
# len(vocab_dict_load["unique_pids"])

avg_duration_ms_seed_pl = 13000151.68
var_duration_ms_seed_pl = 133092900971233.58
vocab_dict_load['avg_duration_ms_seed_pl']=avg_duration_ms_seed_pl
vocab_dict_load['var_duration_ms_seed_pl']=var_duration_ms_seed_pl

avg_n_songs_pl = 55.21
var_n_songs_pl = 2317.54
vocab_dict_load['avg_n_songs_pl']=avg_n_songs_pl
vocab_dict_load['var_n_songs_pl']=var_n_songs_pl

avg_n_artists_pl = 30.56
var_n_artists_pl = 769.26
vocab_dict_load['avg_n_artists_pl']=avg_n_artists_pl
vocab_dict_load['var_n_artists_pl']=var_n_artists_pl

avg_n_albums_pl = 40.25
var_n_albums_pl = 1305.54
vocab_dict_load['avg_n_albums_pl']=avg_n_albums_pl
vocab_dict_load['var_n_albums_pl']=var_n_albums_pl

avg_artist_pop = 16.08
var_artist_pop = 300.64
vocab_dict_load['avg_artist_pop']=avg_artist_pop
vocab_dict_load['var_artist_pop']=var_artist_pop

avg_duration_ms_songs_pl = 234823.14
var_duration_ms_songs_pl = 5558806228.41
vocab_dict_load['avg_duration_ms_songs_pl']=avg_duration_ms_songs_pl
vocab_dict_load['var_duration_ms_songs_pl']=var_duration_ms_songs_pl

avg_artist_followers = 43337.77
var_artist_followers = 377777790193.57
vocab_dict_load['avg_artist_followers']=avg_artist_followers
vocab_dict_load['var_artist_followers']=var_artist_followers

avg_track_pop = 10.85
var_track_pop = 202.18
vocab_dict_load['avg_track_pop']=avg_track_pop
vocab_dict_load['var_track_pop']=var_track_pop
# vocab_dict_load['unique_pids_string']

In [123]:
vocab_dict_load['unique_pids']

array([     0,      1,      2, ..., 999997, 999998, 999999])

In [516]:
for x in dataset.take(1).as_numpy_iterator():
    print(x.shape)
  # pprint(x['track_pop_pl'])


2022-07-07 03:12:27.135176: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 03:12:27.135233: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 03:12:27.135958: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 03:12:27.136003: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.


In [347]:
test_instance = {
    'name': np.asarray([b'Best Christmas']),
    'collaborative': np.asarray([b'false']),
    'pid': np.asarray([173671]),
    'description_pl': np.asarray([b'test description']),
    'duration_ms_seed_pl': np.asarray([5458995.]),
    'n_songs_pl': np.asarray([58.]),
    'num_artists_pl': np.asarray([19.]),
    'num_albums_pl': np.asarray([27.]),
    'artist_name_pl': np.asarray([[b'Juan Luis Guerra 4.40', b'Prince Royce', b'Luis Vargas']]),
    'track_uri_pl': np.asarray([[b'spotify:track:1g0IBPZTRP7VYkctJ4Qafg',b'spotify:track:43wUzbYxEFoXugYkgTzMWp']]),
    'track_name_pl': np.asarray([[b'Lover Come Back', b'White Lightning', b'Shake Me Down']]),
    'duration_ms_songs_pl': np.asarray([[245888., 195709., 283906., 271475., 300373., 275173., 236145.,]]),
    'album_name_pl': np.asarray([[b'Silsulim', b'Sara Shara', b'Muzika Vesheket', b'Ba La Lirkod']]),
    'artist_pop_pl': np.asarray([[81., 81., 70., 66., 66., 66., 46., 87.]]),
    'artists_followers_pl': np.asarray([[3.556710e+05, 8.200000e+02, 1.510000e+02, 1.098080e+05,]]),
    'artist_genres_pl': np.asarray([[b"'israeli pop', 'jewish pop'", b"'israeli pop', 'jewish pop'",]]),
    'track_pop_pl': np.asarray([[70, 77, 50, 44, 30, 28, 15, 26, 15, 18, 46, 38,]])
}

In [514]:
# pprint(test_instance)
dataset.shape

AttributeError: 'BatchDataset' object has no attribute 'shape'

## Playlist Tower

TODO 
* For playlist sequence features: 
> * vectorize text features as sequence (`TextVectorization`)? or `StringLookup`? If vectorize, last dimension in shape needs to be 1
> * Use both `track_uri` and `track_name`? or just one?
> * Should numerical features (followers, popularity) just be an aggregated feature at the playlist-level? Or keep as sequence feature? 

In [366]:
EMBEDDING_DIM = 32
PROJECTION_DIM = 5
SEED = 1234
USE_CROSS_LAYER=True
DROPOUT='False'
DROPOUT_RATE='0.33'

class Playlist_Model(tf.keras.Model):
    def __init__(self, layer_sizes, vocab_dict):
        super().__init__()

        # ========================================
        # non-sequence playlist features
        # ========================================
        
        # Feature: playlist name
        self.pl_name_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    # max_tokens=len(vocab_dict["name"]), # not needed if passing vocab
                    vocabulary=vocab_dict['name'], 
                    name="pl_name_txt_vectorizer", 
                    ngrams=2
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["name"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="pl_name_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="pl_name_pooling"),
            ], name="pl_name_emb_model"
        )
        
        # Feature: collaborative
        collaborative_vocab = np.array([b'false', b'true'])
        
        self.pl_collaborative_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=collaborative_vocab, 
                    mask_token=None, 
                    name="pl_collaborative_lookup", 
                    output_mode='int'
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(collaborative_vocab),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="pl_collaborative_emb_layer",
                ),
            ], name="pl_collaborative_emb_model"
        )
        
        # Feature: pid
        self.pl_pid_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.IntegerLookup(
                    vocabulary=vocab_dict['unique_pids'], 
                    mask_token=None, 
                    name="pl_pid_lookup", 
                    # output_mode='int'
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['unique_pids']),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="pl_pid_emb_layer",
                ),
            ], name="pl_pid_emb_model"
        )
        
        # Feature: description_pl
        self.pl_description_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    # max_tokens=len(vocab_dict["description_pl"]), # not needed if passing vocab
                    vocabulary=vocab_dict['description_pl'], 
                    name="description_pl_vectorizer", 
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["description_pl"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="description_pl_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="description_pl_pooling"),
            ], name="pl_description_emb_model"
        )
        
        # Feature: duration_ms_seed_pl                      
        # TODO: Noramlize or Descritize?
        duration_ms_seed_pl_buckets = np.linspace(
            vocab_dict['min_duration_ms_seed_pl'], 
            vocab_dict['max_duration_ms_seed_pl'], 
            num=1000
        )
        self.duration_ms_seed_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(duration_ms_seed_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(duration_ms_seed_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="duration_ms_seed_pl_emb_layer",
                )
            ], name="duration_ms_seed_pl_emb_model"
        )
        # self.duration_ms_seed_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_duration_ms_seed_pl'],
        #     variance=vocab_dict['var_duration_ms_seed_pl'],
        #     axis=None
        # )
        
        # Feature: n_songs_pl
        # TODO: Noramlize or Descritize?
        n_songs_pl_buckets = np.linspace(
            vocab_dict['min_n_songs_pl'], 
            vocab_dict['max_n_songs_pl'], 
            num=100
        )
        self.n_songs_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(n_songs_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(n_songs_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="n_songs_pl_emb_layer",
                )
            ], name="n_songs_pl_emb_model"
        )
        # self.n_songs_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_n_songs_pl'],
        #     variance=vocab_dict['var_n_songs_pl'],
        #     axis=None
        # )
        
        # Feature: num_artists_pl
        # TODO: Noramlize or Descritize?
        n_artists_pl_buckets = np.linspace(
            vocab_dict['min_n_artists_pl'], 
            vocab_dict['max_n_artists_pl'], 
            num=100
        )
        self.n_artists_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(n_artists_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(n_artists_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="n_artists_pl_emb_layer",
                )
            ], name="n_artists_pl_emb_model"
        )
        # self.n_artists_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_n_artists_pl'],
        #     variance=vocab_dict['var_n_artists_pl'],
        #     axis=None
        # )
        
        # Feature: num_albums_pl
        n_albums_pl_buckets = np.linspace(
            vocab_dict['min_n_albums_pl'], 
            vocab_dict['max_n_albums_pl'],
            num=100
        )
        self.n_albums_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(n_albums_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(n_albums_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM, 
                    name="n_albums_pl_emb_layer",
                )
            ], name="n_albums_pl_emb_model"
        )
        # self.n_albums_pl_normalization = tf.keras.layers.Normalization(
        #     mean=vocab_dict['avg_n_albums_pl'],
        #     variance=vocab_dict['var_n_albums_pl'],
        #     axis=None
        # )
        
        # ========================================
        # sequence playlist features
        # ========================================
        
        # Feature: artist_name_pl
        self.artist_name_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['artist_name_pl'], mask_token=''),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['artist_name_pl']) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="artist_name_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artist_name_pl_emb_model"
        )
        
        # Feature: track_uri_pl
        # 2.2M unique
        self.track_uri_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Hashing(num_bins=200_000),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['track_uri_pl']) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="track_uri_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="track_uri_pl_emb_model"
        )
        
        # Feature: track_name_pl
        self.track_name_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['track_name_pl'], 
                    name="track_name_pl_lookup",
                    output_mode='int',
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['track_name_pl']), 
                    output_dim=EMBEDDING_DIM,
                    name="track_name_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="track_name_pl_emb_model"
        )
        
        # Feature: duration_ms_songs_pl
        duration_ms_songs_pl_buckets = np.linspace(
            vocab_dict['min_duration_ms_songs_pl'], 
            vocab_dict['max_duration_ms_songs_pl'], 
            num=100
        )
        self.duration_ms_songs_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(duration_ms_songs_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(duration_ms_songs_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="duration_ms_songs_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="duration_ms_songs_pl_emb_model"
        )
        
        # Feature: album_name_pl
        self.album_name_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['album_name_pl'], mask_token=None),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['album_name_pl']), 
                    output_dim=EMBEDDING_DIM,
                    name="album_name_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="album_name_pl_emb_model"
        )

        # Feature: artist_pop_pl
        artist_pop_pl_buckets = np.linspace(
            vocab_dict['min_artist_pop'], 
            vocab_dict['max_artist_pop'], 
            num=10
        )
        self.artist_pop_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(artist_pop_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(artist_pop_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="artist_pop_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artist_pop_pl_emb_model"
        )
        
        # Feature: artists_followers_pl
        artists_followers_pl_buckets = np.linspace(
            vocab_dict['min_artist_followers'], 
            vocab_dict['max_artist_followers'], 
            num=10
        )
        self.artists_followers_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(artists_followers_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(artists_followers_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="artists_followers_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artists_followers_pl_emb_model"
        )
        
        # Feature: track_pop_pl
        track_pop_pl_buckets = np.linspace(
            vocab_dict['min_track_pop'], 
            vocab_dict['max_track_pop'], 
            num=10
        )
        self.track_pop_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Discretization(track_pop_pl_buckets.tolist()),
                tf.keras.layers.Embedding(
                    input_dim=len(track_pop_pl_buckets) + 1, 
                    output_dim=EMBEDDING_DIM,
                    name="track_pop_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="track_pop_pl_emb_model"
        )
        
        # Feature: artist_genres_pl
        self.artist_genres_pl_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.StringLookup(
                    vocabulary=vocab_dict['artist_genres_pl'], mask_token=None),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['artist_genres_pl']), 
                    output_dim=EMBEDDING_DIM,
                    name="artist_genres_pl_emb_layer",
                ),
                tf.keras.layers.GRU(EMBEDDING_DIM),
            ], name="artist_genres_pl_emb_model"
        )

        # ========================================
        # dense and cross layers
        # ========================================

        # Cross Layers
        if USE_CROSS_LAYER:
            self._cross_layer = tfrs.layers.dcn.Cross(
                projection_dim=PROJECTION_DIM,
                kernel_initializer="glorot_uniform", 
                name="pl_cross_layer"
            )
        else:
            self._cross_layer = None
            
        # Dense Layers
        self.dense_layers = tf.keras.Sequential(name="pl_dense_layers")
        initializer = tf.keras.initializers.GlorotUniform(seed=SEED)
        
        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    activation="relu", 
                    kernel_initializer=initializer,
                )
            )
            if DROPOUT:
                self.dense_layers.add(tf.keras.layers.Dropout(DROPOUT_RATE))
                
        # No activation for the last layer
        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    kernel_initializer=initializer
                )
            )
        ### ADDING L2 NORM AT THE END
        self.dense_layers.add(
            tf.keras.layers.Lambda(
                lambda x: tf.nn.l2_normalize(
                    x, 1, epsilon=1e-12, name="normalize_dense"
                )
            )
        )
    # ========================================
    # call
    # ========================================
    def call(self, data):
        '''
        The call method defines what happens when
        the model is called
        '''
        
        all_embs = tf.concat(
            [
                self.pl_name_text_embedding(data['name']),
                self.pl_collaborative_embedding(data['collaborative']),
                self.pl_pid_embedding(data["pid"]),
                self.pl_description_text_embedding(data['description_pl']),
                self.duration_ms_seed_pl_embedding(data["duration_ms_seed_pl"]),
                # tf.reshape(self.duration_ms_seed_pl_normalization(data["duration_ms_seed_pl"]), (-1, 1))      # Normalize or Discretize?
                self.n_songs_pl_embedding(data["n_songs_pl"]),
                # tf.reshape(self.n_songs_pl_normalization(data["n_songs_pl"]), (-1, 1))                        # Normalize or Discretize?
                self.n_artists_pl_embedding(data['num_artists_pl']),
                # tf.reshape(self.n_artists_pl_normalization(data["num_artists_pl"]), (-1, 1))                  # Normalize or Discretize?
                self.n_albums_pl_embedding(data["num_albums_pl"]),
                # tf.reshape(self.n_albums_pl_normalization(data["num_albums_pl"]), (-1, 1))                    # Normalize or Discretize?
                
                # sequence features
                # data["pos_pl"],
                self.artist_name_pl_embedding(data["artist_name_pl"]),
                self.track_uri_pl_embedding(data["track_uri_pl"]),
                self.track_name_pl_embedding(data["track_name_pl"]),
                self.duration_ms_songs_pl_embedding(data["duration_ms_songs_pl"]),
                self.album_name_pl_embedding(data["album_name_pl"]),
                self.artist_pop_pl_embedding(data["artist_pop_pl"]),
                self.artists_followers_pl_embedding(data["artists_followers_pl"]),
                self.track_pop_pl_embedding(data["track_pop_pl"]),
                self.artist_genres_pl_embedding(data["artist_genres_pl"]),
            ], axis=1)
        
        # Build Cross Network
        if self._cross_layer is not None:
            cross_embs = self._cross_layer(all_embs)
            return self.dense_layers(cross_embs)
        else:
            return self.dense_layers(all_embs)
    
    # For visualizing model graph only # todo: remove
    # def build_graph(self,):
    #     x = tf.keras.layers.Input(shape=(32,32))
    #     return tf.keras.Model(inputs=[x], outputs=self.call(x))

In [507]:
layer_sizes=[64,32]

test_playlist_model = Playlist_Model(layer_sizes,vocab_dict_load)

pl_result = test_playlist_model(test_instance)

print(f"Shape of pl_result: {pl_result.shape}")
pl_result

Shape of pl_result: (1, 32)


<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[ 0.05321983, -0.04084665,  0.11637767,  0.3411542 ,  0.09852977,
        -0.06006141,  0.14619492,  0.24855876, -0.12215702, -0.15610446,
        -0.08795111, -0.27013752, -0.04377728, -0.12331408,  0.09523007,
        -0.13910483,  0.09044929, -0.19123396,  0.14647555, -0.3671819 ,
         0.19767866,  0.03310812,  0.12395252,  0.14589746,  0.18691422,
        -0.23982897,  0.08464528, -0.00168197,  0.38199148,  0.10042445,
        -0.2411324 , -0.1092459 ]], dtype=float32)>

In [360]:
test_playlist_model.summary(expand_nested=True)

Model: "playlist__model_70"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 pl_name_emb_model (Sequenti  (None, 32)               2368896   
 al)                                                             
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| pl_name_txt_vectorizer (Tex  (None, None)           0         |
| tVectorization)                                               |
|                                                               |
| pl_name_emb_layer (Embeddin  (None, None, 32)       2368896   |
| g)                                                            |
|                                                               |
| pl_name_pooling (GlobalAver  (None, 32)             0         |
| agePooling1D)                                                 |
¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
 pl_collaborative_emb_model   (1, 32)           

In [519]:
it = iter(dataset)
print(next(it).numpy())

# this = next(it)

# print(f"Shape of this: {this.shape} \n")
# print(this.numpy())

2022-07-07 03:18:36.468809: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 03:18:36.468871: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 03:18:36.469407: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-07 03:18:36.469435: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.


StopIteration: 

In [364]:
# test_playlist_model.layers
# test_playlist_model.compile()
test_playlist_model.build((None,32,32,3))

In [365]:
# tf.keras.utils.plot_model(
#     test_playlist_model.build(), 
#     show_shapes=True,
#     show_dtype=True,
#     show_layer_names=True,
#     # rankdir='TB',
#     expand_nested=True,
# )

## Seed Track Tower

In [444]:
for x in dataset.take(1).as_numpy_iterator():
    pprint(x['duration_seed_track'])

2022-07-06 19:01:55.773449: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-06 19:01:55.773506: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-06 19:01:55.773928: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-06 19:01:55.773956: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.


In [454]:
seed_test_instance = {
    'pos_seed_track': np.asarray([45], dtype=np.int64),
    'artist_name_seed_track': np.asarray([b'The Shadowboxers']),
    'artist_uri_seed_track': np.asarray([b'spotify:artist:1zO48bXRrBLAZIxDsMe80S']),
    'track_name_seed_track': np.asarray([b'Ba La Lirkod']),
    'track_uri_seed_track': np.asarray([b'spotify:track:2JuWEcRAVurR3ySo62OEfN']),
    'album_name_seed_track': np.asarray([b'Kaththi (Original Motion Picture Soundtrack)']),
    'album_uri_seed_track': np.asarray([b'spotify:album:2qFqhre2weJ6I4kcVMomtH']),
    'duration_seed_track': np.asarray([291002.0]),
    'track_pop_seed_track': np.asarray([27.0]),
    'artist_pop_seed_track': np.asarray([51.0]),
    'artist_followers_seed_track':np.asarray([29.0]),
    'artist_genres_seed_track': np.asarray([b"'neon pop punk', 'pop punk'"]),
}

pprint(seed_test_instance)

{'album_name_seed_track': array([b'Kaththi (Original Motion Picture Soundtrack)'], dtype='|S44'),
 'album_uri_seed_track': array([b'spotify:album:2qFqhre2weJ6I4kcVMomtH'], dtype='|S36'),
 'artist_followers_seed_track': array([29.]),
 'artist_genres_seed_track': array([b"'neon pop punk', 'pop punk'"], dtype='|S27'),
 'artist_name_seed_track': array([b'The Shadowboxers'], dtype='|S16'),
 'artist_pop_seed_track': array([51.]),
 'artist_uri_seed_track': array([b'spotify:artist:1zO48bXRrBLAZIxDsMe80S'], dtype='|S37'),
 'duration_seed_track': array([291002.]),
 'pos_seed_track': array([45]),
 'track_name_seed_track': array([b'Ba La Lirkod'], dtype='|S12'),
 'track_pop_seed_track': array([27.]),
 'track_uri_seed_track': array([b'spotify:track:2JuWEcRAVurR3ySo62OEfN'], dtype='|S36')}


In [457]:
EMBEDDING_DIM = 32
PROJECTION_DIM = 5
SEED = 1234
USE_CROSS_LAYER=True
DROPOUT='False'
DROPOUT_RATE='0.33'

class Seed_Track_Model(tf.keras.Model):
    def __init__(self, layer_sizes, vocab_dict):
        super().__init__()
        
        # ========================================
        # seed track features
        # ========================================
        
        # Feature: artist_name_seed_track
        self.artist_name_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_name_seed_track"],
                    name="artist_name_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_name_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_name_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_name_seed_track_pooling"),
            ], name="artist_name_seed_track_emb_model"
        )
        
        # Feature: artist_uri_seed_track
        # self.artist_uri_seed_track_embedding = tf.keras.layers.CategoryEncoding(num_tokens=64, output_mode="int")
        self.artist_uri_seed_track_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Hashing(num_bins=200_000),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['artist_uri_seed_track']), 
                    output_dim=EMBEDDING_DIM,
                    name="artist_uri_seed_track_emb_layer",
                ),
            ], name="artist_uri_seed_track_emb_model"
        )
        
        # Feature: track_name_seed_track
        self.track_name_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["track_name_seed_track"],
                    name="track_name_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["track_name_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="track_name_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="track_name_seed_track_pooling"),
            ], name="track_name_seed_track_emb_model"
        )
        
        # Feature: track_uri_seed_track
        self.track_uri_seed_track_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Hashing(num_bins=200_000),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['track_uri_seed_track']), 
                    output_dim=EMBEDDING_DIM,
                    name="track_uri_seed_track_emb_layer",
                ),
            ], name="track_uri_seed_track_emb_model"
        )
        
        # Feature: album_name_seed_track
        self.album_name_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["album_name_seed_track"],
                    name="album_name_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["album_name_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="album_name_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="album_name_seed_track_pooling"),
            ], name="album_name_seed_track_emb_model"
        )
        
        # Feature: album_uri_seed_track
        self.album_uri_seed_track_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Hashing(num_bins=200_000),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['album_uri_seed_track']), 
                    output_dim=EMBEDDING_DIM,
                    name="album_uri_seed_track_emb_layer",
                ),
            ], name="album_uri_seed_track_emb_model"
        )
        
        # Feature: duration_seed_track
        self.duration_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_duration_ms_songs_pl'],
            variance=vocab_dict['var_duration_ms_songs_pl'],
            axis=None
        )
        
        # Feature: track_pop_seed_track
        self.track_pop_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_track_pop'],
            variance=vocab_dict['var_track_pop'],
            axis=None
        )
        
        # Feature: artist_pop_seed_track
        self.artist_pop_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_pop'],
            variance=vocab_dict['var_artist_pop'],
            axis=None
        )
        
        # Feature: artist_followers_seed_track
        self.artist_followers_seed_track_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_followers'],
            variance=vocab_dict['var_artist_followers'],
            axis=None
        )
        
        # Feature: artist_genres_seed_track
        self.artist_genres_seed_track_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_genres_seed_track"],
                    name="artist_genres_seed_track_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_genres_seed_track"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_genres_seed_track_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_genres_seed_track_pooling"),
            ], name="artist_genres_seed_track_emb_model"
        )
        
        
        # ========================================
        # dense and cross layers
        # ========================================

        # Cross Layers
        if USE_CROSS_LAYER:
            self._cross_layer = tfrs.layers.dcn.Cross(
                projection_dim=PROJECTION_DIM,
                kernel_initializer="glorot_uniform", 
                name="seed_track_cross_layer"
            )
        else:
            self._cross_layer = None
            
        # Dense Layer
        self.dense_layers = tf.keras.Sequential(name="seed_track_dense_layers")
        initializer = tf.keras.initializers.GlorotUniform(seed=SEED)
        
        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    activation="relu", 
                    kernel_initializer=initializer,
                )
            )
            if DROPOUT:
                self.dense_layers.add(tf.keras.layers.Dropout(DROPOUT_RATE))
                
        # No activation for the last layer
        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    kernel_initializer=initializer
                )
            )
        ### ADDING L2 NORM AT THE END
        self.dense_layers.add(
            tf.keras.layers.Lambda(
                lambda x: tf.nn.l2_normalize(
                    x, 1, epsilon=1e-12, name="normalize_dense"
                )
            )
        )

    # ========================================
    # call
    # ========================================
    def call(self, data):
        
        all_embs = tf.concat(
            [
                # data['pos_seed_track']
                self.artist_name_seed_track_text_embedding(data['artist_name_seed_track']),
                self.artist_uri_seed_track_embedding(data['artist_uri_seed_track']),
                self.track_name_seed_track_text_embedding(data['track_name_seed_track']),
                self.track_uri_seed_track_embedding(data["track_uri_seed_track"]),
                self.album_name_seed_track_text_embedding(data["album_name_seed_track"]),
                self.album_uri_seed_track_embedding(data["album_uri_seed_track"]),
                tf.reshape(self.duration_seed_track_normalized(data["duration_seed_track"]), (-1, 1)),
                tf.reshape(self.duration_seed_track_normalized(data["track_pop_seed_track"]), (-1, 1)),
                tf.reshape(self.duration_seed_track_normalized(data["artist_pop_seed_track"]), (-1, 1)),
                tf.reshape(self.artist_followers_seed_track_normalized(data["artist_followers_seed_track"]), (-1, 1)),
                self.artist_genres_seed_track_text_embedding(data["artist_genres_seed_track"]),
            ], axis=1)
        
        # Build Cross Network
        if self._cross_layer is not None:
            cross_embs = self._cross_layer(all_embs)
            return self.dense_layers(cross_embs)
        else:
            return self.dense_layers(all_embs)
        

In [508]:
layer_sizes=[64,32]

test_seed_track_model = Seed_Track_Model(layer_sizes, vocab_dict_load)

seed_result = test_seed_track_model(seed_test_instance)

print(f"Shape of seed_result: {seed_result.shape}")
seed_result

Shape of seed_result: (1, 32)


<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[ 0.4738885 , -0.6975312 , -0.5645372 , -0.30292508,  0.28519604,
         0.27546224, -0.30110165,  1.0748749 ,  0.55002886, -0.15349019,
         0.15894473,  0.00926473, -0.03872087, -0.6707542 , -0.45568797,
        -0.03077737,  0.12979181, -0.08829829,  0.08990557, -0.87537277,
         0.27182013,  0.05736695,  0.36312905, -0.85142   , -0.3251322 ,
         0.21681169, -0.13190483,  0.6076351 ,  0.37525287,  0.5584176 ,
         0.09826156, -0.00232439]], dtype=float32)>

In [None]:
test_can_track_model = Candidate_Track_Model(layer_sizes, vocab_dict_load)

can_result = test_can_track_model(can_test_instance)

print(f"Shape of can_result: {can_result.shape}")
can_result

In [459]:
test_seed_track_model.summary(expand_nested=True)

Model: "seed__track__model_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 artist_name_seed_track_emb_  (None, 32)               9206720   
 model (Sequential)                                              
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| artist_name_seed_track_txt_  (None, None)           0         |
| vectorizer (TextVectorizati                                   |
| on)                                                           |
|                                                               |
| artist_name_seed_track_emb_  (None, None, 32)       9206720   |
| layer (Embedding)                                             |
|                                                               |
| artist_name_seed_track_pool  (None, 32)             0         |
| ing (GlobalAveragePooling1D                                   |
| )                                          

## Candidate Track Tower

In [460]:
for x in dataset.take(1).as_numpy_iterator():
    pprint(x['artist_name_can'])

2022-07-06 19:20:49.049023: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-06 19:20:49.049080: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-06 19:20:49.049666: E tensorflow/core/framework/dataset.cc:580] UNIMPLEMENTED: Cannot compute input sources for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.
2022-07-06 19:20:49.049713: E tensorflow/core/framework/dataset.cc:584] UNIMPLEMENTED: Cannot merge options for dataset of type IO>BigQueryDataset, because the dataset does not implement `InputDatasets`.


In [490]:
can_test_instance = {
    'artist_name_can': np.asarray([b'Hellogoodbye']),
    'track_name_can': np.asarray([b'When We First Met']),
    'album_name_can': np.asarray([b'Would It Kill You?']),
    'track_uri_can': np.asarray([b'Ba La Lirkod']),
    'artist_uri_can': np.asarray([b'spotify:artist:6GH0NzpthMGxu1mcfAkOde']),
    'album_uri_can': np.asarray([b'spotify:album:4dHXV7pJs6d8N9ACAMzhIw']),
    'duration_ms_can': np.asarray([154813.0]),
    'track_pop_can': np.asarray([45.0]),
    'artist_pop_can': np.asarray([51.0]),
    'artist_followers_can':np.asarray([205331.0]),
    'artist_genres_can': np.asarray([b"'neon pop punk', 'pop punk'"]),
    'test': np.asarray([b'test'])
}

pprint(can_test_instance)

{'album_name_can': array([b'Would It Kill You?'], dtype='|S18'),
 'album_uri_can': array([b'spotify:album:4dHXV7pJs6d8N9ACAMzhIw'], dtype='|S36'),
 'artist_followers_can': array([205331.]),
 'artist_genres_can': array([b"'neon pop punk', 'pop punk'"], dtype='|S27'),
 'artist_name_can': array([b'Hellogoodbye'], dtype='|S12'),
 'artist_pop_can': array([51.]),
 'artist_uri_can': array([b'spotify:artist:6GH0NzpthMGxu1mcfAkOde'], dtype='|S37'),
 'duration_ms_can': array([154813.]),
 'test': array([b'test'], dtype='|S4'),
 'track_name_can': array([b'When We First Met'], dtype='|S17'),
 'track_pop_can': array([45.]),
 'track_uri_can': array([b'Ba La Lirkod'], dtype='|S12')}


In [491]:
EMBEDDING_DIM = 32
PROJECTION_DIM = 5
SEED = 1234
USE_CROSS_LAYER=True
DROPOUT='False'
DROPOUT_RATE='0.33'

class Candidate_Track_Model(tf.keras.Model):
    def __init__(self, layer_sizes, vocab_dict):
        super().__init__()
        
        # ========================================
        # Candidate features
        # ========================================
        
        # Feature: artist_name_can
        self.artist_name_can_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_name_can"],
                    name="artist_name_can_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_name_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_name_can_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_name_can_pooling"),
            ], name="artist_name_can_emb_model"
        )
        
        # Feature: track_name_can
        self.track_name_can_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["track_name_can"],
                    name="track_name_can_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["track_name_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="track_name_can_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="track_name_can_pooling"),
            ], name="track_name_can_emb_model"
        )
        
        # Feature: album_name_can
        self.album_name_can_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["album_name_can"],
                    name="album_name_can_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["album_name_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="album_name_can_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="album_name_can_pooling"),
            ], name="album_name_can_emb_model"
        )
        
        # Feature: artist_uri_can
        self.artist_uri_can_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Hashing(num_bins=200_000),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['artist_uri_can']), 
                    output_dim=EMBEDDING_DIM,
                    name="artist_uri_can_emb_layer",
                ),
            ], name="artist_uri_can_emb_model"
        )
        
        # Feature: track_uri_can
        self.track_uri_can_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Hashing(num_bins=200_000),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['track_uri_can']), 
                    output_dim=EMBEDDING_DIM,
                    name="track_uri_can_emb_layer",
                ),
            ], name="track_uri_can_emb_model"
        )
        
        # Feature: album_uri_can
        self.album_uri_can_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.Hashing(num_bins=200_000),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict['album_uri_can']), 
                    output_dim=EMBEDDING_DIM,
                    name="album_uri_can_emb_layer",
                ),
            ], name="album_uri_can_emb_model"
        )
        
        # Feature: duration_ms_can
        self.duration_ms_can_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_duration_ms_songs_pl'],
            variance=vocab_dict['var_duration_ms_songs_pl'],
            axis=None
        )
        
        # Feature: track_pop_can
        self.track_pop_can_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_track_pop'],
            variance=vocab_dict['var_track_pop'],
            axis=None
        )
        
        # Feature: artist_pop_can
        self.artist_pop_can_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_pop'],
            variance=vocab_dict['var_artist_pop'],
            axis=None
        )
        
        # Feature: artist_followers_can
        self.artist_followers_can_normalized = tf.keras.layers.Normalization(
            mean=vocab_dict['avg_artist_followers'],
            variance=vocab_dict['var_artist_followers'],
            axis=None
        )
        
        # Feature: artist_genres_can
        self.artist_genres_can_text_embedding = tf.keras.Sequential(
            [
                tf.keras.layers.TextVectorization(
                    vocabulary=vocab_dict["artist_genres_can"],
                    name="artist_genres_can_txt_vectorizer",
                    ngrams=2,
                ),
                tf.keras.layers.Embedding(
                    input_dim=len(vocab_dict["artist_genres_can"]),
                    output_dim=EMBEDDING_DIM,
                    mask_zero=False,
                    name="artist_genres_can_emb_layer",
                ),
                tf.keras.layers.GlobalAveragePooling1D(name="artist_genres_can_pooling"),
            ], name="artist_genres_can_emb_model"
        )
        
        # ========================================
        # Dense & Cross Layers
        # ========================================
        
        # Cross Layers
        
        # Dense Layer
        self.dense_layers = tf.keras.Sequential(name="candidate_dense_layers")
        initializer = tf.keras.initializers.GlorotUniform(seed=SEED)
        
        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    activation="relu", 
                    kernel_initializer=initializer,
                )
            )
            if DROPOUT:
                self.dense_layers.add(tf.keras.layers.Dropout(DROPOUT_RATE))
                
        # No activation for the last layer
        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    kernel_initializer=initializer
                )
            )
            
    # ========================================
    # Call Function
    # ========================================
            
    def call(self, data):
        
        all_embs = tf.concat(
            [
                self.artist_name_can_text_embedding(data['artist_name_can']),
                self.track_name_can_text_embedding(data['track_name_can']),
                self.album_name_can_text_embedding(data['album_name_can']),
                self.artist_uri_can_embedding(data['artist_uri_can']),
                self.track_uri_can_embedding(data['track_uri_can']),
                self.album_uri_can_embedding(data['album_uri_can']),
                tf.reshape(self.duration_ms_can_normalized(data["duration_ms_can"]), (-1, 1)),
                tf.reshape(self.track_pop_can_normalized(data["track_pop_can"]), (-1, 1)),
                tf.reshape(self.artist_pop_can_normalized(data["artist_pop_can"]), (-1, 1)),
                tf.reshape(self.artist_followers_can_normalized(data["artist_followers_can"]), (-1, 1)),
                self.artist_genres_can_text_embedding(data['album_uri_can']),
            ], axis=1
        )
        
        return self.dense_layers(all_embs)

In [506]:
layer_sizes=[64,32]

test_can_track_model = Candidate_Track_Model(layer_sizes, vocab_dict_load)

can_result = test_can_track_model(can_test_instance)

print(f"Shape of can_result: {can_result.shape}")
can_result

Shape of can_result: (1, 32)


<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[-0.64103854,  0.17936414, -0.1597794 , -0.23482801,  0.04011448,
         0.06539779, -0.07605655, -0.17988613,  0.06396451,  0.28101248,
        -0.43454924,  0.06150659, -0.1915848 , -0.11559332,  0.00649117,
         0.04989069,  0.18501326, -0.01549801, -0.16110197,  0.15586206,
         0.16208774,  0.26024503,  0.16025694,  0.12186014, -0.04252896,
        -0.01786662,  0.06143963, -0.39211723, -0.15657601,  0.29838738,
         0.02947132, -0.07277929]], dtype=float32)>

In [489]:
test_can_track_model.summary(expand_nested=True)

Model: "candidate__track__model_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 artist_name_can_emb_model (  (None, 32)               9206720   
 Sequential)                                                     
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| artist_name_can_txt_vectori  (None, None)           0         |
| zer (TextVectorization)                                       |
|                                                               |
| artist_name_can_emb_layer (  (None, None, 32)       9206720   |
| Embedding)                                                    |
|                                                               |
| artist_name_can_pooling (Gl  (None, 32)             0         |
| obalAveragePooling1D)                                         |
¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
 track_name_can_emb_model (S  (None, 32) 

## Combined Model

In [510]:
# pl_result
# seed_result
# can_result

pl_can_concat = tf.concat([pl_result,can_result], axis=1)

print(f"Shape of pl_can_concat: {pl_can_concat.shape[1]}")
pl_can_concat

Shape of pl_can_concat: 64


<tf.Tensor: shape=(1, 64), dtype=float32, numpy=
array([[ 0.05321983, -0.04084665,  0.11637767,  0.3411542 ,  0.09852977,
        -0.06006141,  0.14619492,  0.24855876, -0.12215702, -0.15610446,
        -0.08795111, -0.27013752, -0.04377728, -0.12331408,  0.09523007,
        -0.13910483,  0.09044929, -0.19123396,  0.14647555, -0.3671819 ,
         0.19767866,  0.03310812,  0.12395252,  0.14589746,  0.18691422,
        -0.23982897,  0.08464528, -0.00168197,  0.38199148,  0.10042445,
        -0.2411324 , -0.1092459 , -0.64103854,  0.17936414, -0.1597794 ,
        -0.23482801,  0.04011448,  0.06539779, -0.07605655, -0.17988613,
         0.06396451,  0.28101248, -0.43454924,  0.06150659, -0.1915848 ,
        -0.11559332,  0.00649117,  0.04989069,  0.18501326, -0.01549801,
        -0.16110197,  0.15586206,  0.16208774,  0.26024503,  0.16025694,
         0.12186014, -0.04252896, -0.01786662,  0.06143963, -0.39211723,
        -0.15657601,  0.29838738,  0.02947132, -0.07277929]],
      dtype=f

In [511]:
sd_can_concat = tf.concat([seed_result,can_result], axis=1)

print(f"Shape of sd_can_concat: {sd_can_concat.shape[1]}")
sd_can_concat

Shape of sd_can_concat: 64


<tf.Tensor: shape=(1, 64), dtype=float32, numpy=
array([[ 0.4738885 , -0.6975312 , -0.5645372 , -0.30292508,  0.28519604,
         0.27546224, -0.30110165,  1.0748749 ,  0.55002886, -0.15349019,
         0.15894473,  0.00926473, -0.03872087, -0.6707542 , -0.45568797,
        -0.03077737,  0.12979181, -0.08829829,  0.08990557, -0.87537277,
         0.27182013,  0.05736695,  0.36312905, -0.85142   , -0.3251322 ,
         0.21681169, -0.13190483,  0.6076351 ,  0.37525287,  0.5584176 ,
         0.09826156, -0.00232439, -0.64103854,  0.17936414, -0.1597794 ,
        -0.23482801,  0.04011448,  0.06539779, -0.07605655, -0.17988613,
         0.06396451,  0.28101248, -0.43454924,  0.06150659, -0.1915848 ,
        -0.11559332,  0.00649117,  0.04989069,  0.18501326, -0.01549801,
        -0.16110197,  0.15586206,  0.16208774,  0.26024503,  0.16025694,
         0.12186014, -0.04252896, -0.01786662,  0.06143963, -0.39211723,
        -0.15657601,  0.29838738,  0.02947132, -0.07277929]],
      dtype=f

In [502]:
class MultiTowerModel(tfrs.models.Model):

    def __init__(self, layer_sizes, vocab_dict_load, Seed_Candidate_Weight: float, Playlist_Candidate_Weight: float):
        super().__init__()
        self.playlist_tower = Playlist_Model(layer_sizes, vocab_dict_load)
        self.seed_track_tower = Seed_Track_Model(layer_sizes, vocab_dict_load)
        self.shared_candidate_tower = Candidate_Track_Model(layer_sizes, vocab_dict_load)
        
        initializer = tf.keras.initializers.GlorotUniform(seed=SEED)
        
        # =============================
        # Playlist-Candidate subtower
        # =============================
        '''
        a sub-model to take (1) Playlist and (2) Candidate features
        and retrieve 
        '''
        
        self.Playlist_Candidate_Subtower = tf.keras.Sequential(name="playlist-candidate-subtower")
        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.Playlist_Candidate_Subtower.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    activation="relu", 
                    kernel_initializer=initializer,
                )
            )
            if DROPOUT:
                self.Playlist_Candidate_Subtower.add(tf.keras.layers.Dropout(DROPOUT_RATE))
                
        # No activation for the last layer
        for layer_size in layer_sizes[-1:]:
            self.Playlist_Candidate_Subtower.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    kernel_initializer=initializer
                )
            )
        
        # =============================
        # Seed-Candidate subtower
        # =============================
        
        self.Seed_Candidate_Subtower = tf.keras.Sequential(name="seed-candidate-subtower")
        # Use the ReLU activation for all but the last layer.
        for layer_size in layer_sizes[:-1]:
            self.Seed_Candidate_Subtower.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    activation="relu", 
                    kernel_initializer=initializer,
                )
            )
            if DROPOUT:
                self.Seed_Candidate_Subtower.add(tf.keras.layers.Dropout(DROPOUT_RATE))
                
        # No activation for the last layer
        for layer_size in layer_sizes[-1:]:
            self.Seed_Candidate_Subtower.add(
                tf.keras.layers.Dense(
                    layer_size, 
                    kernel_initializer=initializer
                )
            )
        
        # =============================
        # Retrieval Tasks
        # =============================
              
        self.Playlist_Candidate_Task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=dataset.batch(128).cache().map(self.Playlist_Candidate_Subtower)
            )
        )
        
        self.Seed_Candidate_Task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=dataset.batch(128).cache().map(self.Seed_Candidate_Subtower)
            )
        )
        
        # =============================
        # Loss Weights
        # =============================
        self.Playlist_Candidate_Weight = Playlist_Candidate_Weight
        self.Seed_Candidate_Weight = Seed_Candidate_Weight
        
    # Call
    def call(self, data):
        # get all embeddings
        playlist_embeddings = self.playlist_tower(data)
        seed_track_embeddings = self.seed_track_tower(data)
        candidate_embeddings= self.shared_candidate_tower(data)
        
        # Apply the multi-layered DNN model to a concat of 
        # Playlist and Candidate features
        Playlist_Candidate_Embeddings = self.Playlist_Candidate_Subtower(
            tf.concat([playlist_embeddings, candidate_embeddings], axis=1)
        )
       
        # Apply the multi-layered DNN model to a concat of 
        # Seed_Track and Candidate features
        Seed_Candidate_Embeddings = self.Seed_Candidate_Subtower(
            tf.concat([seed_track_embeddings, candidate_embeddings], axis=1)
        )
        
        return (
            playlist_embeddings,
            seed_track_embeddings,
            candidate_embeddings,
            Playlist_Candidate_Embeddings,
            Seed_Candidate_Embeddings,
        )
        
        
    def compute_loss(self, data, training=False):
        
        playlist_embeddings, seed_track_embeddings, candidate_embeddings, _, _ = self(data)
        
        playlist_candidate_loss = self.Playlist_Candidate_Task(
            playlist_embeddings, candidate_embeddings, compute_metrics=not training)
        
        seed_candidate_loss = self.Seed_Candidate_Task(
            seed_track_embeddings, candidate_embeddings, compute_metrics=not training)

        return (
            self.Playlist_Candidate_Weight * playlist_candidate_loss 
            + self.Seed_Candidate_Weight * seed_candidate_loss
        )

In [503]:
# self.playlist_tower = Playlist_Model(layer_sizes, vocab_dict_load)
# self.seed_track_tower = Seed_Track_Model(layer_sizes, vocab_dict_load)
# self.shared_candidate_tower = Candidate_Track_Model(layer_sizes, vocab_dict_load)

In [512]:
layer_sizes=[64,32]
seed_playlist_weight = 1.0
playlist_candidate_weight = 1.0
LEARNING_RATE = 0.1

EMBEDDING_DIM = 32
PROJECTION_DIM = 5
SEED = 1234
USE_CROSS_LAYER=False
DROPOUT='False'
DROPOUT_RATE='0.33'

model = MultiTowerModel(
    layer_sizes, 
    vocab_dict_load,
    Seed_Candidate_Weight = 1.0, 
    Playlist_Candidate_Weight = 1.0,
)

model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=LEARNING_RATE))



ValueError: Exception encountered when calling layer "playlist-candidate-subtower" (type Sequential).

Layer "dense_152" expects 1 input(s), but it received 41 input tensors. Inputs received: [<tf.Tensor 'args_0:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_1:0' shape=(None, None, None) dtype=string>, <tf.Tensor 'args_2:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_3:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_4:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_5:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_6:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_7:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_8:0' shape=(None, None, None) dtype=string>, <tf.Tensor 'args_9:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_10:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_11:0' shape=(None, None, None) dtype=string>, <tf.Tensor 'args_12:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_13:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_14:0' shape=(None, None, None) dtype=float64>, <tf.Tensor 'args_15:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_16:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_17:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_18:0' shape=(None, None, None) dtype=float64>, <tf.Tensor 'args_19:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_20:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_21:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_22:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_23:0' shape=(None, None, None) dtype=float64>, <tf.Tensor 'args_24:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_25:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_26:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_27:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_28:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_29:0' shape=(None, None) dtype=int64>, <tf.Tensor 'args_30:0' shape=(None, None) dtype=int64>, <tf.Tensor 'args_31:0' shape=(None, None) dtype=int64>, <tf.Tensor 'args_32:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_33:0' shape=(None, None, None) dtype=string>, <tf.Tensor 'args_34:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_35:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_36:0' shape=(None, None, None) dtype=int64>, <tf.Tensor 'args_37:0' shape=(None, None) dtype=float64>, <tf.Tensor 'args_38:0' shape=(None, None) dtype=string>, <tf.Tensor 'args_39:0' shape=(None, None, None) dtype=string>, <tf.Tensor 'args_40:0' shape=(None, None) dtype=string>]

Call arguments received by layer "playlist-candidate-subtower" (type Sequential):
  • inputs=OrderedDict([('album_name_can', 'tf.Tensor(shape=(None, None), dtype=string)'), ('album_name_pl', 'tf.Tensor(shape=(None, None, None), dtype=string)'), ('album_name_seed_track', 'tf.Tensor(shape=(None, None), dtype=string)'), ('album_uri_can', 'tf.Tensor(shape=(None, None), dtype=string)'), ('album_uri_seed_track', 'tf.Tensor(shape=(None, None), dtype=string)'), ('artist_followers_can', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('artist_followers_seed_track', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('artist_genres_can', 'tf.Tensor(shape=(None, None), dtype=string)'), ('artist_genres_pl', 'tf.Tensor(shape=(None, None, None), dtype=string)'), ('artist_genres_seed_track', 'tf.Tensor(shape=(None, None), dtype=string)'), ('artist_name_can', 'tf.Tensor(shape=(None, None), dtype=string)'), ('artist_name_pl', 'tf.Tensor(shape=(None, None, None), dtype=string)'), ('artist_name_seed_track', 'tf.Tensor(shape=(None, None), dtype=string)'), ('artist_pop_can', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('artist_pop_pl', 'tf.Tensor(shape=(None, None, None), dtype=float64)'), ('artist_pop_seed_track', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('artist_uri_can', 'tf.Tensor(shape=(None, None), dtype=string)'), ('artist_uri_seed_track', 'tf.Tensor(shape=(None, None), dtype=string)'), ('artists_followers_pl', 'tf.Tensor(shape=(None, None, None), dtype=float64)'), ('collaborative', 'tf.Tensor(shape=(None, None), dtype=string)'), ('description_pl', 'tf.Tensor(shape=(None, None), dtype=string)'), ('duration_ms_can', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('duration_ms_seed_pl', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('duration_ms_songs_pl', 'tf.Tensor(shape=(None, None, None), dtype=float64)'), ('duration_seed_track', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('n_songs_pl', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('name', 'tf.Tensor(shape=(None, None), dtype=string)'), ('num_albums_pl', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('num_artists_pl', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('pid', 'tf.Tensor(shape=(None, None), dtype=int64)'), ('pos_can', 'tf.Tensor(shape=(None, None), dtype=int64)'), ('pos_seed_track', 'tf.Tensor(shape=(None, None), dtype=int64)'), ('track_name_can', 'tf.Tensor(shape=(None, None), dtype=string)'), ('track_name_pl', 'tf.Tensor(shape=(None, None, None), dtype=string)'), ('track_name_seed_track', 'tf.Tensor(shape=(None, None), dtype=string)'), ('track_pop_can', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('track_pop_pl', 'tf.Tensor(shape=(None, None, None), dtype=int64)'), ('track_pop_seed_track', 'tf.Tensor(shape=(None, None), dtype=float64)'), ('track_uri_can', 'tf.Tensor(shape=(None, None), dtype=string)'), ('track_uri_pl', 'tf.Tensor(shape=(None, None, None), dtype=string)'), ('track_uri_seed_track', 'tf.Tensor(shape=(None, None), dtype=string)')])
  • training=False
  • mask=None

#### References
* [multi-head_and torso: Dugtrio code](https://source.corp.google.com/piper///depot/google3/learning/brain/models/recommendations/dugtrio/torso/multi_head_torso.py)
* [CPU-optimized sparse implementation of attention](https://source.corp.google.com/piper///depot/google3/ads/ubaq/brain_embeddings/tensorflow/layers.py;l=1234;rcl=202730546?q=layers.py%20f:brain_embeddings&dr=)

#### Notes
* `tf.feature_column.sequence_categorical_column_with_identity` - "Pass this to embedding_column or indicator_column to convert sequence categorical data into dense representation for input to sequence NN, such as RN" [docs](https://www.tensorflow.org/api_docs/python/tf/feature_column/sequence_categorical_column_with_identity)
* Implementing a sequential model with Two Tower [docs](https://www.tensorflow.org/recommenders/examples/sequential_retrieval#implementing_a_sequential_model)