In [1]:
import gc

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs
from sentence_transformers import SentenceTransformer
import torch
import time

SEED = 11925939
tf.random.set_seed(SEED)

!nvidia-smi
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("PyTorch version:", torch.__version__)

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

2024-12-11 16:17:26.086384: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-11 16:17:26.086426: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-11 16:17:26.088088: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-11 16:17:26.096939: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from tqdm.autonotebook import tqdm, trange


Wed Dec 11 16:17:46 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-PCIE-32GB           On  |   00000000:B2:00.0 Off |                    0 |
| N/A   33C    P0             25W /  250W |       1MiB /  32768MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla V100-PCIE-32GB           On  |   00

In [2]:
#%pip uninstall --y tf_keras tensorflow tensorflow-recommenders
#%pip install tensorflow==2.15.0
#%pip install tensorflow_recommenders
#%pip install tf_keras

In [3]:
class QueryModel(tf.keras.Model):
    """Model for encoding user queries."""

    def __init__(self, layers):
        """Model for encoding user queries.

        Args:
          layers:
            A list of Layer configuration objects where the i-th entry represents the number of units
            and the activation function the i-th layer contains.
        """
        super().__init__()
        # Then construct the layers.
        self.dense_layers = tf.keras.Sequential()

        # Append deep Layers
        for layer in layers:
            self.dense_layers.add(tf.keras.layers.Dense(layer["size"], activation=layer["act_fn"]))

        # Normalize The Output
        self.dense_layers.add(tf.keras.layers.Lambda(lambda x: tf.keras.backend.l2_normalize(x, axis=-1)))

    def call(self, inputs):
        return self.dense_layers(inputs)


class ProductModel(tf.keras.Model):
    """Model for encoding products."""

    def __init__(self, layers):
        """Model for encoding products.

        Args:
          layers:
            A list of Layer configuration objects where the i-th entry represents the number of units
            and the activation function the i-th layer contains.
        """
        super().__init__()

        # Then construct the layers.
        self.dense_layers = tf.keras.Sequential()

        # Append deep Layers
        for layer in layers:
            self.dense_layers.add(tf.keras.layers.Dense(layer["size"], activation=layer["act_fn"]))

        self.dense_layers.add(tf.keras.layers.Lambda(lambda x: tf.keras.backend.l2_normalize(x, axis=-1)))

    def call(self, inputs):
        return self.dense_layers(inputs)


class TwoTowerModel(tfrs.models.Model):

    def __init__(self, model_config, candidates):
        super().__init__()
        self.query_model = QueryModel(model_config)
        self.product_model = ProductModel(model_config)
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=candidates.batch(128).map(self.query_model)
            ),
        )

    def compute_loss(self, features, training=False):
        query_embeddings = self.query_model(features["query_embedding"])
        product_embeddings = self.product_model(features["product_embedding"])

        return self.task(
            product_embeddings, query_embeddings, compute_metrics=not training)


def loading_data(pairs: pd.DataFrame) -> (tf.data.Dataset, tf.data.Dataset):
    pairs['product_embedding'] = pairs['product_embedding'].apply(np.array)
    pairs['query_embedding'] = pairs['query_embedding'].apply(np.array)
    unique_embeddings = pairs.groupby('id')['product_embedding'].first().values
    
    print('Compete Dataset Shape:', pairs.shape, 'unique candidates: {}', len(unique_embeddings))
    dataset = tf.data.Dataset.from_tensor_slices({
        "query_embedding": np.stack(pairs['query_embedding'].values),
        "product_embedding": np.stack(pairs['product_embedding'].values)
    })
    shuffled = dataset.shuffle(len(pairs), seed=SEED, reshuffle_each_iteration=False)
    candidates_dataset = tf.data.Dataset.from_tensor_slices(np.stack(unique_embeddings))
    del pairs
    return shuffled, candidates_dataset


def find_best_config(num_epochs: int, configs, cached_train, cached_test, candidates_dataset):
    best_accuracy = {'config': None, 'accuracy': -1}

    for i in range(len(configs)):
        torch.cuda.empty_cache()
        model = TwoTowerModel(configs[i], candidates_dataset)
        model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

        history = model.fit(
            cached_train,
            validation_data=cached_test,
            epochs=num_epochs,
            verbose=0)

        accuracy = history.history["val_factorized_top_k/top_100_categorical_accuracy"][-1]
        if accuracy > best_accuracy['accuracy']:
            best_accuracy = {'config': configs[i], 'accuracy': accuracy}

    return best_accuracy


In [4]:
pairs = pd.read_parquet('../synth_set/synthetic_positive_pairs.parquet.gzip').sample(n=100_000,random_state=SEED)
pairs = pd.merge(pairs, pd.read_pickle('../p_collection.pkl'), on='id')
pairs.dropna(subset=['query', 'product_embedding'], inplace=True)

l:int = len(pairs)

shuffled, candidates_dataset = loading_data(pairs)
train = shuffled.take(int(l*0.8))
test = shuffled.skip(int(l*0.8)).take(int(l*0.2))

batch_size = 2048
cached_train = train.batch(batch_size).prefetch(tf.data.AUTOTUNE)
cached_test = test.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)

configs = []
k = 0
for i in range(3):
    config = []
    config.append({'size': 128, 'act_fn': None})
    config.append({'size': 128, 'act_fn': 'elu'})
    for j in range(k):
        size = int(config[len(config) - 1]['size'])
        config.append({'size': size * 2, 'act_fn': 'elu'})
    k += 1
    configs.append(config[::-1])

print("Configs that are brute forced", configs)
start = time.time()
config = find_best_config(5, configs, cached_train, cached_test, candidates_dataset)
print('solution', config)
print('time passed', (time.time() - start), "seconds")

Compete Dataset Shape: (100000, 9) unique candidates: {} 90799


2024-12-11 16:20:21.946790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31134 MB memory:  -> device: 0, name: Tesla V100-PCIE-32GB, pci bus id: 0000:b2:00.0, compute capability: 7.0
2024-12-11 16:20:21.947568: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 31134 MB memory:  -> device: 1, name: Tesla V100-PCIE-32GB, pci bus id: 0000:b5:00.0, compute capability: 7.0
2024-12-11 16:20:24.043777: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


Configs that are brute forced [[{'size': 128, 'act_fn': 'elu'}, {'size': 128, 'act_fn': None}], [{'size': 256, 'act_fn': 'elu'}, {'size': 128, 'act_fn': 'elu'}, {'size': 128, 'act_fn': None}], [{'size': 512, 'act_fn': 'elu'}, {'size': 256, 'act_fn': 'elu'}, {'size': 128, 'act_fn': 'elu'}, {'size': 128, 'act_fn': None}]]


2024-12-11 16:20:29.136091: I external/local_xla/xla/service/service.cc:168] XLA service 0x1517218c5830 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-12-11 16:20:29.136133: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): Tesla V100-PCIE-32GB, Compute Capability 7.0
2024-12-11 16:20:29.136183: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (1): Tesla V100-PCIE-32GB, Compute Capability 7.0
2024-12-11 16:20:29.141797: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-11 16:20:29.188824: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907
I0000 00:00:1733930429.226959 1722958 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


solution {'config': [{'size': 128, 'act_fn': 'elu'}, {'size': 128, 'act_fn': None}], 'accuracy': 0.20145000517368317}
time passed 418.06436586380005 seconds
