# gRPC Text Generation Inference with Caikit+TGIS Serving

### Set the Inference server url (replace with your own address)

In [1]:
inference_server_url = "your_server_address:port"

In [2]:
# Optional, requirements if they are not already present
#!pip install grpcio grpcio-reflection

### Imports
Note: all proto definition files are under the `pb2` directory

In [3]:
import grpc
from grpc_reflection.v1alpha.proto_reflection_descriptor_database import ProtoReflectionDescriptorDatabase
from google.protobuf.descriptor_pool import DescriptorPool
import sys
sys.path.insert(0, "./pb2")
import nlpservice_pb2_grpc
import nlpservice_pb2

### Create the channel with self-signed certificate

Note: to extract the certificate chain, you can use the following command:

`openssl s_client -showcerts -verify 5 -connect your_server_address:port < /dev/null |    awk '/BEGIN CERTIFICATE/,/END CERTIFICATE/{ if(/BEGIN CERTIFICATE/){a++}; out="cert"a".pem"; print >out}'`

In [4]:
with open('certificate.pem', 'rb') as f:
    creds = grpc.ssl_channel_credentials(f.read())

server_address = inference_server_url

channel = grpc.secure_channel(server_address, creds)

### Optional: Service, methods and parameters discovery

In [5]:
# List available services
reflection_db = ProtoReflectionDescriptorDatabase(channel)
services = reflection_db.get_services()
print(f'Available services: {services}')

Available services: ['caikit.runtime.Nlp.NlpService', 'caikit.runtime.Nlp.NlpTrainingService', 'caikit.runtime.training.TrainingManagement', 'grpc.reflection.v1alpha.ServerReflection', 'mmesh.ModelRuntime']


In [6]:
# Selecting the NlpService, list available methods
desc_pool = DescriptorPool(reflection_db)
nlp_service = desc_pool.FindServiceByName('caikit.runtime.Nlp.NlpService')
print('Available methods:')
for m in nlp_service.methods:
    print(m.name)

Available methods:
TextClassificationTaskPredict
TextGenerationTaskPredict
ServerStreamingTextGenerationTaskPredict
TokenizationTaskPredict
TokenClassificationTaskPredict
BidiStreamingTokenClassificationTaskPredict


In [7]:
# Selecting the TextGenerationTaskPredict method, list available fields with types (num id) and default values
# Types reference: https://protobuf.dev/reference/csharp/api-docs/class/google/protobuf/well-known-types/field/types
method_desc = nlp_service.FindMethodByName('TextGenerationTaskPredict')
for field in method_desc.input_type.fields:
    print(f'{field.name}, {field.type}, default: {field.default_value}')

text, 9, default: 
preserve_input_text, 8, default: False
max_new_tokens, 3, default: 0
min_new_tokens, 3, default: 0
device, 9, default: 


### Query the service

In [8]:
# Stub initialization to get easy access to methods
NlpServiceStub = nlpservice_pb2_grpc.NlpServiceStub(channel)

In [9]:
# Needed additional parameter to query the model properly
model_id = 'Llama-2-7b-chat-hf'
metadata = [("mm-model-id", model_id)]

In [10]:
# Our input
text_input = 'Where is Paris'
preserve_input_text = False
max_new_tokens = 200
min_new_tokens = 100

In [11]:
# Let's get some answers!
request = nlpservice_pb2.textgenerationtaskrequest__pb2.TextGenerationTaskRequest(text=text_input,
                                                                                 preserve_input_text=preserve_input_text,
                                                                                 max_new_tokens=max_new_tokens,
                                                                                 min_new_tokens=min_new_tokens)
response = NlpServiceStub.TextGenerationTaskPredict(request=request, metadata=metadata)
print(response.generated_text)

Where is Paris?
Where is Paris located?
Paris is the capital and largest city of France, located in the northern central part of the country. It is situated on the River Seine and is known for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum.
Paris is located in the Île-de-France region, which is the most populous region in France. The city is situated about 100


### Query the service - Streaming answer

In [12]:
# Let's get some streaming answers!
request = nlpservice_pb2.textgenerationtaskrequest__pb2.TextGenerationTaskRequest(text=text_input,
                                                                                 preserve_input_text=preserve_input_text,
                                                                                 max_new_tokens=max_new_tokens,
                                                                                 min_new_tokens=min_new_tokens)
for response in NlpServiceStub.ServerStreamingTextGenerationTaskPredict(request=request, metadata=metadata):
    print(response.generated_text, end ="")

Where is Paris?
Where is Paris located?
Paris is the capital and largest city of France, located in the northern central part of the country. It is situated on the River Seine and is known for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum.
Paris is located in the Île-de-France region, which is the most populous region in France. The city is situated about 100