In [1]:
import os
import logging
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
logging.getLogger("tensorflow").setLevel(logging.ERROR)

import tensorflow as tf
import tensorflow_ranking as tfr
import pandas as pd
import numpy as np
from tensorflow_serving.apis import input_pb2

In [2]:
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    details = tf.config.experimental.get_device_details(gpu)
    print(details)

{'compute_capability': (7, 5), 'device_name': 'Tesla T4'}


In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
pathtrain = "/content/drive/MyDrive/colab/TFM/datasets/yahoo/tfrecords/train.tfrecords"
patheval = "/content/drive/MyDrive/colab/TFM/datasets/yahoo/tfrecords/val.tfrecords"

Mounted at /content/drive


## Create a model builder

### Specify Features

In [4]:
names = ["%d" % (i + 1) for i in range(0, 100)]


In [5]:
context_feature_spec = {}
example_feature_spec = {feat: tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=0.0) 
                        for feat in names}
label_spec = ('relevance_label', 
                tf.io.FixedLenFeature(shape=(1,),
                dtype=tf.int64,
                default_value=-1))

### Define input_creator

In [6]:
input_creator = tfr.keras.model.FeatureSpecInputCreator(
                context_feature_spec, example_feature_spec)

### Define preprocessor

In [7]:
def log1p(tensor):
    return tf.math.log1p(tensor * tf.sign(tensor)) * tf.sign(tensor)

preprocess_spec = {name: lambda t: log1p(t) for name in example_feature_spec.keys()}

preprocessor = tfr.keras.model.PreprocessorWithSpec(preprocess_spec)

### Define scorer

In [8]:
scorer = tfr.keras.model.GAMScorer(
        example_feature_num = 100,
        example_hidden_layer_dims = ["16","8"], 
        activation = tf.nn.relu,
        use_batch_norm = True,
        batch_norm_moment=0.99,
        dropout = 0.4
        )

### Make model_builder

In [9]:
model_builder = tfr.keras.model.ModelBuilder(
                                    input_creator=input_creator,
                                    preprocessor=preprocessor,
                                    scorer=scorer,
                                    mask_feature_name="example_list_mask",
                                    name="model_builder"
)

## Create a dataset builder

In [10]:
dataset_hparams = tfr.keras.pipeline.DatasetHparams(
                    train_input_pattern = pathtrain,
                    valid_input_pattern = patheval,
                    train_batch_size = 32,
                    valid_batch_size = 10,
                    list_size = 50,
                    dataset_reader = tfr.keras.pipeline.DatasetHparams.dataset_reader)

### Make dataset_builder

In [11]:
dataset_builder = tfr.keras.pipeline.SimpleDatasetBuilder(
                    context_feature_spec,
                    example_feature_spec,
                    mask_feature_name="example_list_mask",
                    label_spec=label_spec,
                    hparams=dataset_hparams,
                    sample_weight_spec=None)

## Create a ranking pipeline

In [12]:
pipeline_hparams = tfr.keras.pipeline.PipelineHparams(
                                          model_dir = "/content/drive/MyDrive/colab/TFM/yahoo_ranking_model_dir",
                                          num_epochs = 5,
                                          steps_per_epoch = 1000,
                                          validation_steps = 100,
                                          learning_rate = 0.05,
                                          loss = 'approx_ndcg_loss',
                                          optimizer = 'adagrad')

### Define ranking_pipeline

In [13]:
ranking_pipeline = tfr.keras.pipeline.SimplePipeline(
                    model_builder,
                    dataset_builder=dataset_builder,
                    hparams=pipeline_hparams)

## Train and evaluate the model

In [14]:
ranking_pipeline.train_and_validate(verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
