# Testing Prediction

In [1]:
import pandas as pd

## Sample Data

In [2]:
df_sample = pd.DataFrame.from_dict(
    {
        "user_id": ["A", "B", "C"],
        "act_exe_num": [0, 1, 3],
        "act_med_num": [1, 13, 2],
        "act_read_num": [4, 5, 10],
        "act_exe_nununique": [0, 1, 2],
        "act_med_nununique": [1, 3, 2],
        "act_read_nununique": [3, 3, 3],
        "task_exe_completed": [0, 0, 0],
        "task_med_completed": [0, 0, 1],
        "task_read_completed": [0, 1, 1],
        "streak_current": [2, 5, 69],
    }
)

## Data Request Preparation

### With Serialize

- Pros: Input data as serialized to boost performance when inference the models. See: https://medium.com/@avidaneran/tensorflow-serving-rest-vs-grpc-e8cef9d4ff62
- Cons: Need library TensorFlow (>1GB) just to serialize data not worth it just to make inference, need more research or build necessary part of TensorFlow

In [3]:
import base64
import tensorflow as tf

2024-03-14 23:37:56.155453: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-14 23:37:56.248050: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-14 23:37:56.250586: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
list_features = [key for key in df_sample.keys() if key != "user_id"]


def int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def generate_example(row):
    features = {key: int64_feature(row[key]) for key in list_features}
    example_proto = tf.train.Example(features=tf.train.Features(feature=features))
    return example_proto

In [10]:
model_inputs = ""
init = True
for idx, row in df_sample.iterrows():
    example = generate_example(row)
    serialized_proto = example.SerializeToString()
    example_bytes = base64.b64encode(serialized_proto).decode("utf-8")

    if init:
        model_inputs += "["
        init = False
    else:
        model_inputs += ","

    model_inputs += '{"b64": "%s"}' % (example_bytes)
model_inputs += "]"

### Without Serialize

In [11]:
model_inputs_v2 = "{"
for idx, key in enumerate(list_features):
    model_inputs_v2 += '"%s": %s' % (key, df_sample[key].to_list())
    if idx != len(list_features) - 1:
        model_inputs_v2 += ","
model_inputs_v2 += "}"

## Get Predictions

In [12]:
# ENDPOINT = "http://localhost:8080/v1/models/urge-model:predict" # LOCAL
ENDPOINT = (
    "https://tfdf-inference-bu72b6gr6a-uc.a.run.app/v1/models/urge-classifier:predict"
)

### With Serialize

In [13]:
import requests
import pprint

In [14]:
data_request = '{"signature_name": "predict", "inputs": %s}' % (model_inputs)
# See: https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/api_rest.md
response = requests.post(ENDPOINT, data=data_request).json()
pprint.pprint(response)

{'outputs': {'logistic': [[0.52228564], [0.57797724], [0.668834031]],
             'logits': [[0.0892016962], [0.314475358], [0.702916265]],
             'probabilities': [[0.47771433, 0.52228564],
                               [0.42202273, 0.57797724],
                               [0.331165969, 0.668834031]]}}


### Without Serialize

In [15]:
data_request = '{"signature_name": "serving_default", "inputs": %s}' % (model_inputs_v2)
# See: https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/api_rest.md
response_v2 = requests.post(ENDPOINT, data=data_request).json()
pprint.pprint(response_v2)

{'outputs': [[0.47771433, 0.52228564],
             [0.42202273, 0.57797724],
             [0.331165969, 0.668834031]]}


## Traffic Simulation for Test Monitoring

In [None]:
import time
import random

counter = 0
while True:
    print("ðŸš© ==>", counter)

    data_request = '{"signature_name": "predict", "inputs": %s}' % (model_inputs)
    # See: https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/api_rest.md
    response = requests.post(ENDPOINT, data=data_request)
    print("Latency ==>", response.elapsed.total_seconds())
    print("Status Code ==>", response.status_code)

    # Bypass idle prometheus as well
    response_mon = requests.get("https://tfdf-monitoring-bu72b6gr6a-uc.a.run.app/")
    print("Latency Mon ==>", response.elapsed.total_seconds())
    print("Status Code Mon ==>", response_mon.status_code)
    counter += 1
    time.sleep(random.uniform(0.2, 2.0))