In [None]:
!pip install -q tensorflow-recommenders
!pip install -q scann

In [1]:
import numpy as np 
import pandas as pd 
import os
import zipfile
import json
import tensorflow as tf
import tensorflow_recommenders as tfrs
from tqdm import tqdm
from typing import Dict, Text

2022-12-27 09:01:21.996938: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-27 09:01:23.202189: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib/x86_64-linux-gnu:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib/x86_64-linux-gnu:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2022-12-27 09:01:23.202435: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Co

In [2]:
print('TensorFlow version: {}'.format(tf.__version__))
print('TensorFlow Recommender version: {}'.format(tfrs.__version__))
print('TensorFlow Raking')
print('TensorFlow ScaNN')

TensorFlow version: 2.11.0
TensorFlow Recommender version: v0.7.2
TensorFlow Raking
TensorFlow ScaNN


In [3]:
INPUT_DIR = '/kaggle/input/otto-train-tfrecord-file/kaggle/working'
OUTPUT_DIR = '/kaggle/working/'
TRAIN_FILE = f'{INPUT_DIR}/train.jsonl'
TEST_FILE = f'{INPUT_DIR}/test.jsonl'

MODEL_DIR = f'{OUTPUT_DIR}/serving_model'
MODEL_DIR_SCAN = f'{MODEL_DIR}/ScaNN_Model'
MODEL_DIR_NORM = f'{MODEL_DIR}/Norm_Model'

!mkdir -p {MODEL_DIR}
!mkdir -p {MODEL_DIR_SCAN}
!mkdir -p {MODEL_DIR_NORM}

In [4]:
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    # Create 2 virtual GPUs with 1GB memory each
    try:
        tf.config.set_logical_device_configuration(
            gpus[0],
            [tf.config.LogicalDeviceConfiguration(memory_limit=1024),
             tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
        logical_gpus = tf.config.list_logical_devices("GPU")
        print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print('error')
        print(e)

strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

1 Physical GPU, 2 Logical GPUs
Number of devices: 2


In [None]:
# tfrecord_files = []
# for dirname, _, filenames in os.walk(INPUT_DIR):
#     for filename in filenames:
#         tfrecord_files.append(os.path.join(dirname, filename))
#         break
# print(tfrecord_files)

In [5]:
# Create a description of the features.
feature_description = {
    'session': tf.io.FixedLenFeature([], tf.int64),
    'aid': tf.io.FixedLenFeature([], tf.int64),
    'ts': tf.io.FixedLenFeature([], tf.int64),
    'typ': tf.io.FixedLenFeature([], tf.int64),
}

In [6]:
window_size = 500

def item2item(ds):
    item1 = ds['aid'][0]
    item2, idx, rating = tf.unique_with_counts(ds['aid'])
    item1 = tf.repeat(item1, repeats=[len(item2)],axis=0)
    return {"item_A" : tf.strings.as_string(item1), "item_B" : tf.strings.as_string(item2), "rating": tf.cast(rating,dtype=tf.float32)}

def unique_item(ds):
    items, idx, count = tf.unique_with_counts(ds['item_B'])
    return {"items" :  items}

In [7]:
with strategy.scope():
    dataset = tf.data.Dataset.list_files(f'{INPUT_DIR}/train_chunk_0*')
    dataset = dataset.interleave(lambda x: tf.data.TFRecordDataset(x, compression_type='ZLIB'), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.map(lambda x: tf.io.parse_single_example(x, feature_description))
    dataset = dataset.take(1000)
    dataset = dataset.group_by_window(
                        key_func=lambda x: x['session'],
                        reduce_func=lambda key, dataset: dataset.batch(window_size),
                        window_size=window_size)
    dataset = dataset.map(item2item).flat_map(tf.data.Dataset.from_tensor_slices)
    print(dataset.element_spec)
    dataset = dataset.batch(batch_size=64, num_parallel_calls=tf.data.AUTOTUNE)
    print(dataset.element_spec)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    print(dataset.element_spec)

    items = dataset.map(unique_item).flat_map((tf.data.Dataset.from_tensor_slices))
    items = items.map(lambda x: x["items"])
    items = items.batch(64)
    print(items.element_spec)

{'item_A': TensorSpec(shape=(), dtype=tf.string, name=None), 'item_B': TensorSpec(shape=(), dtype=tf.string, name=None), 'rating': TensorSpec(shape=(), dtype=tf.float32, name=None)}
{'item_A': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'item_B': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'rating': TensorSpec(shape=(None,), dtype=tf.float32, name=None)}
{'item_A': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'item_B': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'rating': TensorSpec(shape=(None,), dtype=tf.float32, name=None)}
TensorSpec(shape=(None,), dtype=tf.string, name=None)


In [None]:
# for element in dataset.take(2):
#     print(element)

In [None]:
# for element in items.take(2):
#     print(element)

In [8]:
%%time
itemlist_unique = np.unique(np.concatenate(list(items)))
# print(len(itemlist_unique))

CPU times: user 161 ms, sys: 26 ms, total: 187 ms
Wall time: 154 ms


In [9]:
print(len(itemlist_unique))

615


In [10]:
train_dataset = dataset.shuffle(1000, seed=42, reshuffle_each_iteration=False)

#train_dataset = shuffled_dataset.take(2000)

# A multitask Model

There are two critical parts to multi-task recommenders:

* They optimize for two or more objectives, and so have two or more losses.
* They share variables between the tasks, allowing for transfer learning.
In this tutorial, we will define our models as before, but instead of having a single task, we will have two tasks: one that predicts ratings, and one that predicts movie watches.

The user and movie models are as before:

In [11]:
with strategy.scope():
    class item2itemModel(tfrs.models.Model):
        def __init__(self) -> None:
            super().__init__()
            embedding_dimension = 32
            self.item_A_model = tf.keras.Sequential([
                tf.keras.layers.StringLookup(
                    vocabulary=itemlist_unique, mask_token=None),
                tf.keras.layers.Embedding(len(itemlist_unique) + 1, embedding_dimension)
                ])

            self.item_B_model = tf.keras.Sequential([
                tf.keras.layers.StringLookup(
                    vocabulary=itemlist_unique, mask_token=None),
                tf.keras.layers.Embedding(len(itemlist_unique) + 1, embedding_dimension)
                ])

            self.rating_model = tf.keras.Sequential([
                tf.keras.layers.Dense(256, activation="relu"),
                tf.keras.layers.Dense(128, activation="relu"),
                tf.keras.layers.Dense(1),
                ])

            self.retrieval_task = tfrs.tasks.Retrieval(
                loss=tf.keras.losses.CategoricalCrossentropy(),
                metrics=tfrs.metrics.FactorizedTopK(
                    candidates=items.map(self.item_B_model),
                    )
                )

            self.rating_task = tfrs.tasks.Ranking(
                loss=tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE),
                metrics=[tf.keras.metrics.RootMeanSquaredError()],
                )

        def call(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:
            item_A_embeddings = self.item_A_model(features["item_A"])
            item_B_embeddings = self.item_B_model(features["item_B"])
            predicted_ratings = self.rating_model((tf.concat([item_A_embeddings, item_B_embeddings], axis=1)))
            
            return (item_A_embeddings, item_B_embeddings, predicted_ratings)

        def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
            label_ratings = features.pop("rating")
            item_A_embeddings, item_B_embeddings, predicted_ratings = self(features)
            print('item_A_embeddings: ', item_A_embeddings.shape)
            print('item_B_embeddings: ', item_B_embeddings.shape)
            print('predicted_ratings: ', predicted_ratings.shape)
            
            if training:
                rating_loss = self.rating_task(labels=label_ratings, predictions=predicted_ratings,compute_metrics=False)
                retrieval_loss = self.retrieval_task(item_A_embeddings, item_B_embeddings,compute_metrics=False)
            else:
                rating_loss = self.rating_task(labels=label_ratings, predictions=predicted_ratings,compute_metrics=True)
                retrieval_loss = self.retrieval_task(item_A_embeddings, item_B_embeddings,compute_metrics=True)
            print('retrieval_loss: ', retrieval_loss)

            return (retrieval_loss + rating_loss)       

In [12]:
with strategy.scope():
    model = item2itemModel()
    model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

In [13]:
with strategy.scope():
    cached_train = train_dataset.shuffle(1000).cache()
    cached_test = train_dataset.shuffle(1000).batch(128).cache()

    print(cached_train.element_spec)
    print(cached_test.element_spec)
    print(dataset.element_spec)

{'item_A': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'item_B': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'rating': TensorSpec(shape=(None,), dtype=tf.float32, name=None)}
{'item_A': TensorSpec(shape=(None, None), dtype=tf.string, name=None), 'item_B': TensorSpec(shape=(None, None), dtype=tf.string, name=None), 'rating': TensorSpec(shape=(None, None), dtype=tf.float32, name=None)}
{'item_A': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'item_B': TensorSpec(shape=(None,), dtype=tf.string, name=None), 'rating': TensorSpec(shape=(None,), dtype=tf.float32, name=None)}


In [14]:
    model.fit(cached_train, epochs=3)

2022-12-27 09:02:17.128888: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:784] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "GroupByWindowDataset/_16"
op: "GroupByWindowDataset"
input: "TakeDataset/_15"
attr {
  key: "Tkey_func_other_arguments"
  value {
    list {
    }
  }
}
attr {
  key: "Treduce_func_other_arguments"
  value {
    list {
    }
  }
}
attr {
  key: "Twindow_size_func_other_arguments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "key_func"
  value {
    func {
      name: "__inference_Dataset_group_by_window_key_func_wrapper_82"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\026GroupByWindowDataset:5"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
      sh

Epoch 1/3
item_A_embeddings:  (None, 32)
item_B_embeddings:  (None, 32)
predicted_ratings:  (None, 1)
retrieval_loss:  Tensor("retrieval/Identity:0", shape=(None,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:0)
item_A_embeddings:  (None, 32)
item_B_embeddings:  (None, 32)
predicted_ratings:  (None, 1)
retrieval_loss:  Tensor("replica_1/retrieval/Identity:0", shape=(None,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:1)
item_A_embeddings:  (None, 32)
item_B_embeddings:  (None, 32)
predicted_ratings:  (None, 1)
retrieval_loss:  Tensor("retrieval/Identity:0", shape=(None,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:0)
item_A_embeddings:  (None, 32)
item_B_embeddings:  (None, 32)
predicted_ratings:  (None, 1)
retrieval_loss:  Tensor("replica_1/retrieval/Identity:0", shape=(None,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:1)



You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


      1/Unknown - 7s 7s/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_10_categorical_accuracy: 0.0156 - factorized_top_k/top_50_categorical_accuracy: 0.0625 - factorized_top_k/top_100_categorical_accuracy: 0.1406 - root_mean_squared_error: 2.5398 - loss: 28.0865 - regularization_loss: 0.0000e+00 - total_loss: 28.0865


You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


      3/Unknown - 8s 398ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0052 - factorized_top_k/top_10_categorical_accuracy: 0.0156 - factorized_top_k/top_50_categorical_accuracy: 0.0521 - factorized_top_k/top_100_categorical_accuracy: 0.1458 - root_mean_squared_error: 1.8627 - loss: 21.8183 - regularization_loss: 0.0000e+00 - total_loss: 21.8183


You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


      4/Unknown - 9s 426ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0039 - factorized_top_k/top_10_categorical_accuracy: 0.0117 - factorized_top_k/top_50_categorical_accuracy: 0.0391 - factorized_top_k/top_100_categorical_accuracy: 0.1172 - root_mean_squared_error: 1.6465 - loss: 20.6285 - regularization_loss: 0.0000e+00 - total_loss: 20.6285


You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


      6/Unknown - 9s 420ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0028 - factorized_top_k/top_10_categorical_accuracy: 0.0083 - factorized_top_k/top_50_categorical_accuracy: 0.0305 - factorized_top_k/top_100_categorical_accuracy: 0.1025 - root_mean_squared_error: 1.5711 - loss: 19.0526 - regularization_loss: 0.0000e+00 - total_loss: 19.0526


You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


     10/Unknown - 11s 391ms/step - factorized_top_k/top_1_categorical_accuracy: 0.0000e+00 - factorized_top_k/top_5_categorical_accuracy: 0.0016 - factorized_top_k/top_10_categorical_accuracy: 0.0049 - factorized_top_k/top_50_categorical_accuracy: 0.0178 - factorized_top_k/top_100_categorical_accuracy: 0.0843 - root_mean_squared_error: 2.3999 - loss: 26.2955 - regularization_loss: 0.0000e+00 - total_loss: 26.2955


You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.


Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f78e3c5bd90>

In [None]:
    metrics = model.evaluate(cached_test, return_dict=True)

    print(f"Retrieval top-100 accuracy: {metrics['factorized_top_k/top_100_categorical_accuracy']:.3f}.")
    print(f"Ranking RMSE: {metrics['root_mean_squared_error']:.3f}.")

In [None]:
    scann_index = tfrs.layers.factorized_top_k.ScaNN(model.item_A_model, k=50)
    scann_index.index_from_dataset(
      tf.data.Dataset.zip((items, items.map(model.item_B_model)))
    )

In [None]:
# Save the index.
tf.saved_model.save(
      scann_index,
      MODEL_DIR_SCAN,
      options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])
  )

In [None]:
model.retrieval_task = tfrs.tasks.Retrieval()  # Removes the metrics.
model.compile()
model.save(MODEL_DIR_NORM)

In [None]:
model = tf.keras.models.load_model(MODEL_DIR_NORM)

In [None]:
!zip -r trainer.zip '/kaggle/working/serving_model'

In [None]:
# Get recommendations.
_, rec_item = scann_index(tf.constant(["421211"]))
print(f"Recommendations for user 42: {rec_item[0, :50]}")

In [None]:
test_ratings = {}

for item in rec_item[0].numpy():
    A, B, test_ratings[item] = model({
      "item_A": np.array(["421211"]),
      "item_B": np.array([item])
  })

print("Ratings:")
for key, value in test_ratings.items():
    print('key:', key, "rating:", value )
