<h2 align="center"> Deploy Models with TensorFlow Serving and Docker</h2>

### 1. Load and Preprocess Data

In [2]:
#%%writefile -a train.py
import os
import time
import pandas as pd
import numpy as np
from numpy.random import RandomState

import tensorflow as tf
import tensorflow_hub as hub

In [3]:
#Source. Download or import the csv from here, split it in test/train using your preferred method, and save the train and test csvs in the working directory: https://www.kaggle.com/snap/amazon-fine-food-reviews/data

df = pd.read_csv('Reviews.csv')
total_size=len(df)

rng = RandomState()
train = df.sample(frac=0.7, random_state=rng)
test = df.loc[~df.index.isin(train.index)]

train.to_csv('train.csv')
test.to_csv('test.csv')


In [4]:

file_path = 'train.csv'
df_first3 = pd.read_csv(file_path, nrows=3)
print(df_first3)

   Unnamed: 0      Id   ProductId          UserId  ProfileName  \
0      243068  243069  B005K4Q4KG  A1Y5J68F22DRUR   Allison H.   
1      456984  456985  B000N2XM4Q  A2ORH6OTUJO2EP  T. L. Graff   
2      474540  474541  B002GWH7O2   A9QPAY95AMKPX    G. Scales   

   HelpfulnessNumerator  HelpfulnessDenominator  Score        Time  \
0                     1                       2      3  1322611200   
1                     0                       0      4  1222214400   
2                     0                       0      5  1325462400   

                                   Summary  \
0                           It's decent...   
1                              Primula tea   
2  Very fragrant tea, well worth the price   

                                                Text  
0  This is okay cocoa. It's not really very smoot...  
1  This is a wonderful product.  The only thing I...  
2  I had accidentally ordered this tea for use du...  


In [5]:
#%%writefile -a train.py

## A base example reference notebook from TF: https://colab.research.google.com/github/tensorflow/hub/blob/master/examples/colab/tf2_text_classification.ipynb

def load_dataset(file_path, num_samples):
    df = pd.read_csv(file_path, usecols=[6, 9], nrows=num_samples)
    df.columns = ['rating', 'title']

    text = df['title'].tolist()
    text = [str(t).encode('ascii', 'replace') for t in text]
    text = np.array(text, dtype=object)[:]
    
    labels = df['rating'].tolist()
    labels = [1 if i>=4 else 0 if i==3 else -1 for i in labels]
    labels = np.array(pd.get_dummies(labels), dtype=int)[:] 

    return labels, text

In [6]:
tmp_labels, tmp_text = load_dataset('train1.csv', 100)
tmp_text.shape

(100,)

### 2: Build Model borrowed from TF Hub

In [8]:
#%%writefile -a train.py
##https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1
#paper: https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf

def get_model():
    hub_layer = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim50/2", output_shape=[50], 
                           input_shape=[], dtype=tf.string, name='tfhub_input', trainable=False)

    model = tf.keras.Sequential()
    model.add(hub_layer)
    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dense(3, activation='softmax', name='output'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam', metrics=['accuracy'])
    model.summary()
    return model

In [7]:
embed = hub.load("https://tfhub.dev/google/nnlm-en-dim50/2")
embeddings = embed(["this is a test for the embeddings"])
embeddings

<tf.Tensor: id=179, shape=(1, 50), dtype=float32, numpy=
array([[ 0.11477911,  0.37569994,  0.3200057 ,  0.19149409,  0.04541118,
        -0.08389199,  0.08161246,  0.08308495, -0.22092862, -0.02034098,
        -0.09112693,  0.02896112,  0.13872515, -0.24331579,  0.16903718,
        -0.32694525, -0.15207155,  0.05370582, -0.01047741, -0.17795743,
        -0.08367762, -0.03455025,  0.1478257 ,  0.08285411, -0.11994869,
         0.05815081, -0.49235696,  0.25065354,  0.1175537 , -0.10012385,
        -0.20885593,  0.1405636 , -0.10900527, -0.22035465, -0.05779462,
        -0.17156166,  0.05206814, -0.29343027, -0.0085016 , -0.0828478 ,
        -0.0283776 , -0.11447106,  0.10054447,  0.20275821, -0.17710298,
        -0.27764925,  0.15726376,  0.03514048, -0.0738261 ,  0.00131135]],
      dtype=float32)>

### 3: Training Procedure

In [15]:
#%%writefile -a train.py

def train(EPOCHS=4, BATCH_SIZE=32, TRAIN_FILE='train.csv', VAL_FILE='test.csv'):
    WORKING_DIR = os.getcwd() #use to specify model checkpoint path
    print("Loading training/validation data ...")
    y_train, x_train = load_dataset(TRAIN_FILE, num_samples=100000)
    y_val, x_val = load_dataset(VAL_FILE, num_samples=10000)

    print("Training the model ...")
    model = get_model()
    model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1,
              validation_data=(x_val, y_val),
              callbacks=[tf.keras.callbacks.ModelCheckpoint(os.path.join(WORKING_DIR,
                                                                         'model_checkpoint'),
                                                            monitor='val_loss', verbose=1,
                                                            save_best_only=True,
                                                            save_weights_only=False,
                                                            mode='auto')])
    return model

### 4: Train and Export Model as Protobuf

In [16]:
#%%writefile -a train.py

def export_model(model, base_path="models/"):
    path = os.path.join(base_path, str(int(time.time())))
    tf.saved_model.save(model, path)


if __name__== '__main__':
    model = train()
    export_model(model)

Loading training/validation data ...
Training the model ...
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
tfhub_input (KerasLayer)     (None, 50)                48190600  
_________________________________________________________________
dense_1 (Dense)              (None, 16)                816       
_________________________________________________________________
output (Dense)               (None, 3)                 51        
Total params: 48,191,467
Trainable params: 867
Non-trainable params: 48,190,600
_________________________________________________________________
Train on 100000 samples, validate on 10000 samples
Epoch 1/4
Epoch 00001: val_loss improved from inf to 0.57506, saving model to /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint
INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/as

INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/assets


Epoch 2/4
Epoch 00002: val_loss improved from 0.57506 to 0.56594, saving model to /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint
INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/assets


INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/assets


Epoch 3/4
Epoch 00003: val_loss improved from 0.56594 to 0.56443, saving model to /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint
INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/assets


INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/assets


Epoch 4/4
Epoch 00004: val_loss improved from 0.56443 to 0.56269, saving model to /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint




INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/assets


INFO:tensorflow:Assets written to: /Users/idelic/projects/tf-serving-test/review-model-v1/model_checkpoint/assets






INFO:tensorflow:Assets written to: models/1624983773/assets


INFO:tensorflow:Assets written to: models/1624983773/assets


### 5: Test Model

#### Negative Review:

In [17]:
test_sentence = "tastes bad, did not like it"
model.predict([test_sentence])

array([[0.50390905, 0.20161174, 0.29447913]], dtype=float32)

#### Positive Review:

In [19]:
test_sentence = "amazing book, great read"
model.predict([test_sentence])

array([[0.00405166, 0.00360874, 0.9923396 ]], dtype=float32)

### 6: TensorFlow Serving with Docker

`docker pull tensorflow/serving`

`docker run -p 8500:8500 \
            -p 8501:8501 \
            --mount type=bind,\
            source=models/,\
            target=/models/nnlm \
            -e MODEL_NAME=nnlm \
            -t tensorflow/serving`

### 7: Setup a REST Client to perform Model Predictions

#### Perform Model Prediction

##### Support for gRPC and REST

- TensorFlow Serving supports
    - Remote Procedure Protocal (gRPC)
    - Representational State Transfer (REST)
- Consistent API structures
- Server supports both standards simultaneously
- Default ports:
    - RPC: 8500
    - REST: 8501

#### Predictions via REST

- Standard HTTP POST requests
- Response is a JSON body with the prediction
- Request from the default or specific model

Default URI scheme:

`http://{HOST}:{PORT}/v1/models/{MODEL_NAME}`

Specific model versions:

`http://{HOST}:{PORT}/v1/models/{MODEL_NAME}[/versions/{MODEL_VERSION}]:predict`

In [20]:
%%writefile tf_serving_rest_client.py
import json
import requests
import sys

def get_rest_url(model_name, host='127.0.0.1', port='8501', verb='predict', version=None):
    """ generate the URL path"""
    url = "http://{host}:{port}/v1/models/{model_name}".format(host=host, port=port, model_name=model_name)
    if version:
        url += 'versions/{version}'.format(version=version)
    url += ':{verb}'.format(verb=verb)
    return url


def get_model_prediction(model_input, model_name='nnlm', signature_name='serving_default'):
    """ no error handling at all, just poc"""

    url = get_rest_url(model_name)
    #In the row format, inputs are keyed to instances key in the JSON request.
    #When there is only one named input, specify the value of instances key to be the value of the input:
    data = {"instances": [model_input]}
    
    rv = requests.post(url, data=json.dumps(data))
    if rv.status_code != requests.codes.ok:
        rv.raise_for_status()
    
    return rv.json()['predictions']

if __name__ == '__main__':

    print("\nGenerate REST url ...")
    url = get_rest_url(model_name='nnlm')
    print(url)
    
    while True:
        print("\nEnter a review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = input()
        if sentence == ':q':
            break
        model_input = sentence
        model_prediction = get_model_prediction(model_input)
        print("The model predicted ...")
        print(model_prediction)

Overwriting tf_serving_rest_client.py


### 8: Setup a gRPC Client

Reference example: [https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/mnist_client.py](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/mnist_client.py#L152)

#### Predictions via gRPC

More sophisticated client-server connections

- Prediction data has to be converted to the Protobuf format
- Request types have designated types, e.g. float, int, bytes
- Payloads need to be converted to base64
- Connect to the server via gRPC stubs

#### gRPC vs REST: When to use which API standard

- Rest is easy to implement and debug
- RPC is more network efficient, smaller payloads
- RPC can provide much faster inferences!
- Mino-batching