In [1]:
from tensorrtserver.api import grpc_service_pb2
from functools import partial
import grpc_gcp_caip_pb2
import grpc_image_client
import requests
import argparse


In [2]:
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False)
parser.add_argument('-a', '--async', dest="async_set", action="store_true", required=False)
parser.add_argument('--streaming', action="store_true", required=False, default=False)
parser.add_argument('-m', '--model-name', type=str, required=True)
parser.add_argument('-x', '--model-version', type=int, required=False)
parser.add_argument('-b', '--batch-size', type=int, required=False, default=1)
parser.add_argument('-c', '--classes', type=int, required=False, default=1)
parser.add_argument('-s', '--scaling', type=str, choices=['NONE', 'INCEPTION', 'VGG'],required=False, default='NONE')
parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001')
#parser.add_argument('-i','--image_filename', type=str, default=None)
parser.add_argument('image_filename', type=str, nargs='?', default=None)

FLAGS = parser.parse_args(args=['-m','resnet50_netdef',
#                                '-u','35.225.226.0:5000',
                                '-u','35.202.30.17:5000',
#                                '-u','127.0.0.1:5000',
                                '-s','INCEPTION',
                                '/workspace/images/mug.jpg'])


In [3]:
!gcloud compute --project=tsaikevin-inference firewall-rules create triton --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:5000 --source-ranges=0.0.0.0/0

Creating firewall...-Created [https://www.googleapis.com/compute/v1/projects/tsaikevin-inference/global/firewalls/triton].
Creating firewall...done.                                                      
NAME    NETWORK  DIRECTION  PRIORITY  ALLOW     DENY  DISABLED
triton  default  INGRESS    1000      tcp:5000        False


In [4]:
predict_url = "http://{}/v1/models/m/versions/v:predict".format(FLAGS.url)

headers = {
  'Content-Type': 'application/json',
  'Authorization': 'Bearer `gcloud auth print-access-token`'
}
predict_url

'http://35.202.30.17:5000/v1/models/m/versions/v:predict'

In [5]:
# Prepare request for Status gRPC
status_request = grpc_service_pb2.StatusRequest(model_name=FLAGS.model_name)
caip_request = grpc_gcp_caip_pb2.CaipRequest(request_type=grpc_gcp_caip_pb2.TYPE_STATUS_REQUEST, status_request = status_request)

# CaipRequest send, StatusResponse receive
response = requests.request("POST", predict_url, headers=headers, data = caip_request.SerializeToString()).content

status_response = grpc_service_pb2.StatusResponse().FromString(response)

In [6]:
# Make sure the model matches our requirements, and get some
# properties of the model that we need for preprocessing
input_name, output_name, c, h, w, format, dtype = grpc_image_client.parse_model(status_response, FLAGS.model_name, FLAGS.batch_size, FLAGS.verbose)

In [7]:
filledRequestGenerator = partial(grpc_image_client.requestGenerator, input_name, output_name, c, h, w, format, dtype, FLAGS)

In [8]:
# Send requests of FLAGS.batch_size images. If the number of
# images isn't an exact multiple of FLAGS.batch_size then just
# start over with the first images until the batch is filled.
result_filenames = []
infer_requests = []
infer_responses = []

In [9]:
for infer_request in filledRequestGenerator(result_filenames):
    caip_request = grpc_gcp_caip_pb2.CaipRequest(request_type=grpc_gcp_caip_pb2.TYPE_INFER_REQUEST, infer_request = infer_request)
    infer_responses.append(grpc_service_pb2.InferResponse().FromString(requests.request("POST", predict_url, headers=headers, data = caip_request.SerializeToString()).content))
    
idx = 0
for infer_response in infer_responses:
    print("Request {}, batch size {}".format(idx, FLAGS.batch_size))
    grpc_image_client.postprocess(infer_response.meta_data.output, result_filenames[idx], FLAGS.batch_size)
    idx += 1

Request 0, batch size 1
Image '/workspace/images/mug.jpg':
    504 (COFFEE MUG) = 0.7773650288581848


In [10]:
tracer_text = "gimme your money"

In [11]:
caip_tracer = grpc_gcp_caip_pb2.CaipRequest(request_type=grpc_gcp_caip_pb2.TYPE_TRACER, trace_message = tracer_text)
caip_tracer

request_type: TYPE_TRACER
trace_message: "gimme your money"

In [12]:
response = requests.request("POST", predict_url, headers=headers, data = caip_tracer.SerializeToString()).content
response

b'Your trace message was: gimme your money'