# PyTorch-TFServing Tutorial

Tensorflow Serving is used to provide an API providing priorities of a particular set of offers for a particular customer in real time. The priorities come from a PyTorch model, which consumes customer ID and returns vector of priorities of all available offers. This model has to be convertes to Tensorflow SavedModel format for TF Serving to be able to use it. 

## Architecture

### The served model

Inputs:
* Customer ID

Outputs:
* Vector of click probabilities
* Vector of corresponding offer names

### The PyTorch model under the hood

Inputs:
* 0-based customer index

Outputs:
* Vector of all click probabilities for all offers of the customer

## Init

In [None]:
NUM_OFFERS = 500
NUM_CLIENTS = 1_000_000
OFFERS = [f"n{i}" for i in range(NUM_OFFERS)]
CLIENTS = [11*i for i in range(NUM_CLIENTS)]
OFFERS_OOV_BUCKETS = 0
CLIENTS_OOV_BUCKETS = 1

SAVED_MODELS_ROOT = '../models'

In [None]:
import time
import numpy as np
import torch
import torch.nn as nn
import tensorflow as tf

## Build model

In [None]:
class InnerModel(nn.Module):
    def __init__(self, client_offer_matrix):
        super().__init__()
        # Register as a buffer so it's part of the model but not trainable
        self.register_buffer('client_offer_matrix', client_offer_matrix)

    def forward(self, client_indices):
        # client_indices: shape (batch,)
        return self.client_offer_matrix[client_indices]  # shape (batch, num_offers)

In [None]:
def get_test_matrix(nrows, ncols):
    row_indices = tf.range(nrows, dtype=tf.float32)[:, tf.newaxis]
    col_indices = tf.range(ncols, dtype=tf.float32)[tf.newaxis, :]
    return 1000.0 * row_indices + col_indices

In [None]:
test_matrix = get_test_matrix(len(CLIENTS) + CLIENTS_OOV_BUCKETS, len(OFFERS) + OFFERS_OOV_BUCKETS)

In [None]:
TEST_CLIENT_INDICES = [0, 3, 2, 0]
model = InnerModel(torch.from_numpy(test_matrix.numpy()).float(), )
client_indices = torch.tensor(TEST_CLIENT_INDICES)  # shape (batch,)
output = model(client_indices)

print(output)

print(f"output.shape: {output.shape}")

In [None]:
assert output.shape == (client_indices.shape[0], len(OFFERS)+OFFERS_OOV_BUCKETS)

## Convert InnerModel to TensorFlow

In [None]:
import sys
sys.version

In [None]:
import nobuco
from nobuco import ChannelOrder, ChannelOrderingStrategy, convert
from nobuco.layers.weight import WeightLayer

In [None]:
keras_model = nobuco.pytorch_to_keras(
    model,
    args=[client_indices],
    input_shapes={client_indices: (None,)}
)

In [None]:
keras_model.summary()

In [None]:
keras_output = keras_model(np.array(TEST_CLIENT_INDICES))

In [None]:
assert np.allclose(keras_output.numpy(), output)

In [None]:
keras_model(tf.constant([11, 22]))

## Add boilerplate

* Enable having Customer IDs as input: Add lookup table converting input from Customer IDs to customer indices, which are input to the PyTorch model
* Add constant list of offer names as another output.


In [None]:
class LookupLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.client_lookup_table = tf.lookup.StaticVocabularyTable(
            tf.lookup.KeyValueTensorInitializer(
                keys=tf.constant(CLIENTS, dtype=tf.int64),
                values=tf.range(len(CLIENTS), dtype=tf.int64)
            ),
            num_oov_buckets=CLIENTS_OOV_BUCKETS
        )
    def call(self, inputs):
        # Inputs: int64 tensor, shape (batch,) or (batch, ...)
        # Returns: indices, same shape as inputs
        return self.client_lookup_table.lookup(inputs)

In [None]:
offers_tf_tensor = tf.constant(OFFERS, dtype=tf.string)

In [None]:
inputs = tf.keras.Input(shape=(), dtype=tf.int64, name="client_id")
indices = LookupLayer()(inputs)
x = keras_model(indices)
main_output = tf.keras.layers.Identity(name="priorities")(x)

def tile_constant(x):
    tf.print(x)
    batch_size = tf.shape(x)[0]
    # makes shape (batch_size, 3)
    return tf.tile(tf.expand_dims(offers_tf_tensor, 0), [batch_size, 1])

constant_output = tf.keras.layers.Lambda(tile_constant, name="offer_names")(inputs)

multiout_model = tf.keras.Model(inputs=inputs, outputs={"priorities": main_output, "offer_names": constant_output})


In [None]:
multiout_model.summary()

In [None]:
multiout_model.outputs

In [None]:
out = multiout_model(tf.constant([11, 22]))   # Customer IDs
print("keras_model output:", out["priorities"])
print("constant offers_tf_tensor:", out["offer_names"])

In [None]:
multiout_model.save(f"{SAVED_MODELS_ROOT}/test_model/{int(time.time())}", save_format="tf")