# Build baseline tfrs model 

Look inside of `./two_tower_src/` for the source code and model code

In [1]:
PROJECT_ID = 'hybrid-vertex'  # <--- TODO: CHANGE THIS
LOCATION = 'us-central1' 

In [2]:
# !pip install tensorflow-recommenders==0.6.0 --user

In [3]:
import os

os.environ['TF_GPU_THREAD_MODE']='gpu_private'
# os.environ['TF_GPU_THREAD_COUNT']='1000'
os.environ['TF_GPU_ALLOCATOR']='cuda_malloc_async'

In [4]:
import json

import tensorflow as tf
import logging


# limiting GPU growth
# gpus = tf.config.list_physical_devices('GPU')
# if gpus:
#     try:
#         for gpu in gpus:
#             tf.config.experimental.set_memory_growth(gpu, True)
#         logging.info(f'detected: {len(gpus)} GPUs')
#     except RuntimeError as e:
#         # Memory growth must be set before GPUs have been initialized
#         logging.info(e)

import tensorflow_recommenders as tfrs



from google.cloud import storage

from two_tower_src import two_tower as tt



2022-10-07 03:18:08.994863: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-07 03:18:09.684683: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:214] Using CUDA malloc Async allocator for GPU: 0
2022-10-07 03:18:09.684941: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38238 MB memory:  -> device: 0, name: NVIDIA A100-SXM4-40GB, pci bus id: 0000:00:04.0, compute capability: 8.0


## Create Dataset for local training and testing

### Playlist dataset

In [5]:
batch_size = 47000
train_dir = 'spotify-beam-v3'
train_dir_prefix = 'v6/train_last_5_v2/'

valid_dir = 'spotify-beam-v3'
valid_dir_prefix = 'v6/valid_last_5/'

client = storage.Client()

options = tf.data.Options()
options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.AUTO
 

train_files = []
for blob in client.list_blobs(f'{train_dir}', prefix=f'{train_dir_prefix}', delimiter="/"):
    train_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))

def full_parse(data):
    data = tf.data.TFRecordDataset(data)
    return data
    
train_dataset = tf.data.Dataset.from_tensor_slices(train_files).prefetch(
    tf.data.AUTOTUNE,
)

train_dataset = train_dataset.interleave(
    full_parse,
    cycle_length=tf.data.AUTOTUNE, 
    num_parallel_calls=tf.data.AUTOTUNE,
    deterministic=False,
).map(tt.parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE,).batch(
    batch_size 
).prefetch(
    tf.data.AUTOTUNE,
).with_options(options)


valid_files = []
for blob in client.list_blobs(f'{valid_dir}', prefix=f'{valid_dir_prefix}', delimiter="/"):
    valid_files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))


valid_dataset = tf.data.Dataset.from_tensor_slices(valid_files).prefetch(
    tf.data.AUTOTUNE,
)

valid_dataset = valid_dataset.interleave(
    full_parse,
    num_parallel_calls=tf.data.AUTOTUNE,
    cycle_length=tf.data.AUTOTUNE, 
    deterministic=False,
).map(tt.parse_tfrecord, num_parallel_calls=tf.data.AUTOTUNE,).batch(
    batch_size
).prefetch(
    tf.data.AUTOTUNE,
).with_options(options)

# Local Training

In [6]:
layer_sizes=[256,128]
with tf.device('/GPU:0'):
    model = tt.TheTwoTowers(layer_sizes)

    model.compile(optimizer=tf.keras.optimizers.Adagrad(0.01))

In [7]:
## Quick look at the layers
print("Playlist (query) Tower:")

for i, l in enumerate(model.query_tower.layers):
    print(i, l.name)

Playlist (query) Tower:
0 pl_name_emb_model
1 pl_collaborative_emb_model
2 pl_track_uri_emb_model
3 n_songs_pl_emb_model
4 n_artists_pl_emb_model
5 n_albums_pl_emb_model
6 artist_name_pl_emb_model
7 track_uri_pl_emb_model
8 track_name_pl_emb_model
9 duration_ms_songs_pl_emb_model
10 album_name_pl_emb_model
11 artist_pop_pl_emb_model
12 artists_followers_pl_emb_model
13 track_pop_pl_emb_model
14 artist_genres_pl_emb_model
15 pl_cross_layer
16 pl_dense_layers


In [8]:
print("Track (candidate) Tower:")
for i, l in enumerate(model.candidate_tower.layers):
    print(i, l.name)

Track (candidate) Tower:
0 artist_name_can_emb_model
1 track_name_can_emb_model
2 album_name_can_emb_model
3 artist_uri_can_emb_model
4 track_uri_can_emb_model
5 album_uri_can_emb_model
6 duration_ms_can_normalized
7 track_pop_can_normalized
8 artist_pop_can_normalized
9 artist_followers_can_normalized
10 artist_genres_can_emb_model
11 can_cross_layer
12 candidate_dense_layers


### Local training for one Epoch

In [9]:
import time

In [None]:
NUM_EPOCHS = 10
start_time = time.time()
with tf.device('/GPU:0'):
    layer_history = model.fit(
        train_dataset,
        validation_data=valid_dataset,
        validation_freq=5,
        epochs=NUM_EPOCHS,
        # steps_per_epoch=2,
        # callbacks=tensorboard_cb,
        # verbose=0
    )
end_time = time.time()

print(f"Training for {NUM_EPOCHS} epoch")
accuracy = layer_history.history['val_factorized_top_k/top_1_categorical_accuracy'][-1]
print(f"Top 100 categorical accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 10/10

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)





In [19]:
runtime_mins = int((end_time - start_time) / 60)
print(f"Total runtime: {runtime_mins}")

Total runtime: 462


In [32]:
val_keys = [v for v in layer_history.history.keys() if 'val' in v]
print([(key, layer_history.history[key]) for key in val_keys])
                      #'val_factorized_top_k/top_1_categorical_accuracy']]

[('val_factorized_top_k/top_1_categorical_accuracy', [0.3375703990459442, 0.36303529143333435]), ('val_factorized_top_k/top_5_categorical_accuracy', [0.7338976860046387, 0.6722028255462646]), ('val_factorized_top_k/top_10_categorical_accuracy', [0.7596203088760376, 0.7091684937477112]), ('val_factorized_top_k/top_50_categorical_accuracy', [0.8029807209968567, 0.7852988839149475]), ('val_factorized_top_k/top_100_categorical_accuracy', [0.8168746829032898, 0.8088387846946716]), ('val_loss', [27012130.0, 315272576.0]), ('val_regularization_loss', [0, 0]), ('val_total_loss', [27012130.0, 315272576.0])]
