# Build baseline tfrs model 

Look inside of `two_tower_src/` for the source code and model code

This notebook constructs the two tower model and saves the model to GCS

We will use managed Tensorboard for training. Before begininning, create a new tensorboard instance by going to Vertex -> Experiments -> Tensorboard Instances -> Create

![](img/create-a-tb.png)

In [1]:
# !pip install tensorflow-recommenders==0.6.0 --user

In [2]:
PROJECT_ID = 'hybrid-vertex'  # <--- TODO: CHANGE THIS
LOCATION = 'us-central1' 
path = 'gs://two-tower-models' #TODO change to your model directory

In [3]:
import os

os.environ['TF_GPU_THREAD_MODE']='gpu_private'
os.environ['TF_GPU_ALLOCATOR']='cuda_malloc_async'

In [4]:
import json

import tensorflow as tf
import logging
import time

import tensorflow_recommenders as tfrs


from google.cloud import storage

from two_tower_src import two_tower as tt
#inside this tt module the data parsing functions, candidate dataset and model classes are found

2022-10-11 13:15:44.577544: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-11 13:15:45.215830: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0
2022-10-11 13:15:45.216081: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38238 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:00:04.0, compute capability: 8.0


## Create Dataset for local training and testing

Inspect the contents of the directory - you can change parameters in the header of the `two_tower.py` script

In [5]:
!tree two_tower_src

[01;34mtwo_tower_src[00m
├── __init__.py
├── [01;34m__pycache__[00m
│   ├── __init__.cpython-37.pyc
│   └── two_tower.cpython-37.pyc
└── two_tower.py

1 directory, 4 files


### Playlist dataset

In [6]:
batch_size = 40000
train_dir = 'spotify-beam-v3'
train_dir_prefix = 'v6/train_last_5_v2/'

valid_dir = 'spotify-beam-v3'
valid_dir_prefix = 'v6/valid_last_5/'

client = storage.Client()
from google.cloud import aiplatform as vertex_ai


options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.AUTO
 

train_files = []
for blob in client.list_blobs(f'{train_dir}', prefix=f'{train_dir_prefix}', delimiter="/"):
    train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))

def full_parse(data):
    # used for interleave - takes tensors and returns a tf.dataset
    data = tf.data.TFRecordDataset(data)
    return data
    
train_dataset = tf.data.Dataset.from_tensor_slices(train_files).prefetch(
    tf.data.AUTOTUNE,
)

train_dataset = train_dataset.interleave(
    full_parse,
    cycle_length=tf.data.AUTOTUNE, 
    num_parallel_calls=tf.data.AUTOTUNE,
    deterministic=False,
).map(tt.parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE,).batch(
    batch_size 
).prefetch(
    tf.data.AUTOTUNE,
).with_options(options)


valid_files = []
for blob in client.list_blobs(f'{valid_dir}', prefix=f'{valid_dir_prefix}', delimiter="/"):
    valid_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))


valid_dataset = tf.data.Dataset.from_tensor_slices(valid_files).prefetch(
    tf.data.AUTOTUNE,
)

valid_dataset = valid_dataset.interleave(
    full_parse,
    num_parallel_calls=tf.data.AUTOTUNE,
    cycle_length=tf.data.AUTOTUNE, 
    deterministic=False,
).map(tt.parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE).batch(
    batch_size
).prefetch(
    tf.data.AUTOTUNE,
).with_options(options)

# Local Training

Compile the model
Review the details of the model layers

In [7]:
layer_sizes=[256,128]
model = tt.TheTwoTowers(layer_sizes)

model.compile(optimizer=tf.keras.optimizers.Adagrad(0.01))

In [8]:
## Quick look at the layers
print("Playlist (query) Tower:")

for i, l in enumerate(model.query_tower.layers):
    print(i, l.name)

Playlist (query) Tower:
0 pl_name_emb_model
1 pl_collaborative_emb_model
2 pl_track_uri_emb_model
3 n_songs_pl_emb_model
4 n_artists_pl_emb_model
5 n_albums_pl_emb_model
6 artist_name_pl_emb_model
7 track_uri_pl_emb_model
8 track_name_pl_emb_model
9 duration_ms_songs_pl_emb_model
10 album_name_pl_emb_model
11 artist_pop_pl_emb_model
12 artists_followers_pl_emb_model
13 track_pop_pl_emb_model
14 artist_genres_pl_emb_model
15 pl_cross_layer
16 pl_dense_layers


In [9]:
print("Track (candidate) Tower:")
for i, l in enumerate(model.candidate_tower.layers):
    print(i, l.name)

Track (candidate) Tower:
0 artist_name_can_emb_model
1 track_name_can_emb_model
2 album_name_can_emb_model
3 artist_uri_can_emb_model
4 track_uri_can_emb_model
5 album_uri_can_emb_model
6 duration_ms_can_normalized
7 track_pop_can_normalized
8 artist_pop_can_normalized
9 artist_followers_can_normalized
10 artist_genres_can_emb_model
11 can_cross_layer
12 candidate_dense_layers


### Local training for ten Epochs

Setup tensorboard below so training is visible and we can inspect the graph

In [10]:
TB_RESOURCE_NAME = 'projects/934903580331/locations/us-central1/tensorboards/7336372589079560192' #fqn - project number then tensorboard id
invoke_time = time.strftime("%Y%m%d-%H%M%S")
EXPERIMENT_NAME = f'spotify-singe-node-train-full-data-{invoke_time}'
LOG_DIR = path+"/tb-logs/"+EXPERIMENT_NAME


def get_upload_logs_to_manged_tb_command():
    """
    Run this and copy/paste the command into terminal to have 
    upload the tensorboard logs from this machine to the managed tb instance
    """
    print(f"""Helper for copy/past TF log upload command:

    tb-gcp-uploader --tensorboard_resource_name={TB_RESOURCE_NAME} \
      --logdir={LOG_DIR} \
      --experiment_name={EXPERIMENT_NAME} --one_shot=False
    """)

In [11]:
logs_dir = f'{path}/tb-logs/{EXPERIMENT_NAME}'

tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=logs_dir,
        histogram_freq=0, 
        write_graph=True, 
        profile_batch=(20,50) #run profiler on steps 20-40
    )

get_upload_logs_to_manged_tb_command()

2022-10-11 13:15:58.205900: I tensorflow/core/profiler/lib/profiler_session.cc:110] Profiler session initializing.
2022-10-11 13:15:58.205943: I tensorflow/core/profiler/lib/profiler_session.cc:125] Profiler session started.
2022-10-11 13:15:58.206044: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1630] Profiler found 1 GPUs
2022-10-11 13:15:58.207221: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcupti.so.11.2'; dlerror: libcupti.so.11.2: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64


Helper for copy/past TF log upload command:

    tb-gcp-uploader --tensorboard_resource_name=projects/934903580331/locations/us-central1/tensorboards/7336372589079560192       --logdir=gs://two-tower-models/tb-logs/spotify-singe-node-train-full-data-20221011-131552       --experiment_name=spotify-singe-node-train-full-data-20221011-131552 --one_shot=False
    


2022-10-11 13:15:58.387966: I tensorflow/core/profiler/lib/profiler_session.cc:143] Profiler session tear down.
2022-10-11 13:15:58.388195: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1764] CUPTI activity buffer flushed


### Training using tensorboard callback

While profiling does not work for managed Tensorboard at this time, you can inspect the profiler with an [inline Tensorboard in another notebook](https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks). You may be prompted to install the tensorflow profiler library

In [None]:
NUM_EPOCHS = 9


start_time = time.time()
layer_history = model.fit(
    train_dataset,
    validation_data=valid_dataset,
    validation_freq=3,
    epochs=NUM_EPOCHS,
    # steps_per_epoch=2, #use this for development to run just a few steps
    callbacks=[tensorboard_callback],
    verbose=0
)
end_time = time.time()

2022-10-11 13:16:12.237968: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2022-10-11 13:16:12.238019: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2022-10-11 13:16:27.430554: I tensorflow/core/profiler/lib/profiler_session.cc:110] Profiler session initializing.
2022-10-11 13:16:27.430612: I tensorflow/core/profiler/lib/profiler_session.cc:125] Profiler session started.
2022-10-11 13:17:09.873017: I tensorflow/core/profiler/lib/profiler_session.cc:67] Profiler session collecting data.
2022-10-11 13:17:10.247155: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1764] CUPTI activity buffer flushed
2022-10-11 13:17:35.590855: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:521]  GpuTracer has collected 44859 callback api events and 44828 activity events. 
2022-10-11 13:17:56.31628

Open up the terminal and run the command from above - here's an example:

![](img/upload-tb-logs.png)

#### **Notice the link to the managed tensorboard**

You can also access the experiment from the console via the name you just declared:

![](img/experiment-console.png)

![](img/tensorboard.png)

### Also, while this is running - check out the Tensorboard profiler in `utils`.

![](img/tb-profiler.png)

In [None]:
runtime_mins = int((end_time - start_time) / 60)
print(f"Total runtime: {runtime_mins} minutes")

In [None]:
val_keys = [v for v in layer_history.history.keys() if 'val' in v]
print([(key, layer_history.history[key]) for key in val_keys])
                      #'val_factorized_top_k/top_1_categorical_accuracy']]

### Now, save the model

In [None]:
# first, create the bucket to store the tensorflow models
# ! gsutil mb -l us-central1 $path

In [None]:
#save the models

tf.saved_model.save(model.query_tower, export_dir=path + "/query_model")
tf.saved_model.save(model.candidate_tower, export_dir=path + "/candidate_model")

## Save the candidate embeddings to GCS for use in Matching Engine later
These will be the files we use for the index

This does the following
1) Create a tf pipeline to convert embeddings to numpy
2) Serialize the candidate song emgeddings with the song_uri index and save to gcs

In [None]:
# create a tf function to convert any bad null values
def tf_if_null_return_zero(val):
    """
    this function fills in nans to zeros - sometimes happens in embedding calcs.
    this will clean the embedding inputs downstream
    """
    return(tf.clip_by_value(val, -1e12, 1e12)) # a trick to remove NANs post tf2.0

In [None]:
candidate_embeddings = tt.parsed_candidate_dataset.batch(10000).map(lambda x: [x['track_uri_can'], tf_if_null_return_zero(model.candidate_tower(x))])

In [None]:
# Save to the required format

for batch in candidate_embeddings:
    songs, embeddings = batch
    with open("candidate_embeddings.json", 'w') as f:
        for song, emb in zip(songs.numpy(), embeddings.numpy()):
            f.write('{"id":"' + str(song) + '","embedding":[' + ",".join(str(x) for x in list(emb)) + ']}')
            f.write("\n")

In [None]:
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"
    # The path to your file to upload
    # source_file_name = "local/path/to/file"
    # The ID of your GCS object
    # destination_blob_name = "storage-object-name"
    # bucket_name = bucket_name.strip("gs://")
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(
        f"File {source_file_name} uploaded to {destination_blob_name}."
    )
upload_blob('two-tower-models', 'candidate_embeddings.json', 'candidates/candidate_embeddings.json')

### Finished

Go on to the [03 notebook](03-matching-engine.ipynb)

You should see results similar to the screenshot below
![](img/embeddings.png)