# Example Notebook to show how to use RAPIDS+Pytorch with Triton

This notebook calls a ensemble model which uses RAPIDS+Pytorch with Triton


<img src="notebook_images/ensemble_rapids_simple.jpg" width="300" height="400">

### Client Setup

In [1]:
# !pip install nvidia-pyindex
# !pip install tritonclient[all]

### Import Libraries

In [2]:
import numpy as np
import json

import grpc
from tritonclient.grpc import service_pb2
from tritonclient.grpc import service_pb2_grpc
import tritonclient.grpc as grpcclient
from functools import partial

###  Connect to the Triton End to End Model 

In [3]:
url='localhost:8001'

triton_client = grpcclient.InferenceServerClient(url=url,verbose=False)

channel = grpc.insecure_channel(url)
grpc_stub = service_pb2_grpc.GRPCInferenceServiceStub(channel)

In [4]:
preprocessing_model = 'end_to_end_model'
request = service_pb2.ModelMetadataRequest(name=preprocessing_model,
                                           version='1')
response = grpc_stub.ModelMetadata(request)
print("model metadata:\n{}".format(response))

model metadata:
name: "end_to_end_model"
versions: "1"
platform: "ensemble"
inputs {
  name: "product_reviews"
  datatype: "BYTES"
  shape: -1
  shape: -1
}
outputs {
  name: "preds"
  datatype: "INT64"
  shape: -1
  shape: -1
}



## Send Request to Model 

### Prepare Input 

In [5]:
log_ls = ['The product is great', 'This produce is worst','This product is good', 'This product is bad']
log_ls = [l.encode('utf-8') for l in log_ls]
log_ar = np.array(log_ls).reshape(1,len(log_ls))

### Request Sending Function

In [6]:
def send_preprocess_requet(log_ar, model_name='end_to_end_model'):
    triton_client = grpcclient.InferenceServerClient(url=url,verbose=False)
    input_grpc = grpcclient.InferInput("product_reviews",log_ar.shape,"BYTES")
    input_grpc.set_data_from_numpy(log_ar)
    outputs = []
    outputs.append(grpcclient.InferRequestedOutput('preds'))
    
    output = triton_client.infer(model_name=model_name,
                               inputs=[input_grpc],
                              outputs=outputs)
    

    return output

In [7]:
%%time
output = send_preprocess_requet(log_ar)

CPU times: user 1.03 ms, sys: 1.27 ms, total: 2.31 ms
Wall time: 583 ms


##  Predictions

##### 1 is positive, 0 is negative

In [8]:
output.as_numpy('preds')

array([[1, 0, 1, 0]])