# REST Inference

## Setup

### Install the client package

In [None]:
pip install caikit-nlp-client


Change that following variable settings match your deployed model's *Inference endpoint*. for example: 

```
infer_endpoint = "https://flan-t5-small-caikit-predictor-userx-workshop.apps.clusterx.sandboxx.opentlc.com"
```

In [None]:
model_id = "flan-t5-small-caikit"
infer_endpoint = "https://flan-t5-small-caikit-predictor-userx-workshop.apps.<cluster>.com"
infer_url = f"{infer_endpoint}/api/v1/task/text-generation"
str_infer_url = f"{infer_endpoint}/api/v1/task/server-streaming-text-generation"

## Using the client library


In [None]:
from caikit_nlp_client import HttpClient

http_client = HttpClient(infer_endpoint, verify=False)

text = http_client.generate_text(model_id, "At what temperature does Nitrogen boil?")
text

## Python Request Function

Build and submit the REST request. 

Note: You submit the data in the same format that you used for an ONNX inference.

In [None]:
import requests


def rest_request(data):
    json_data = {
        "model_id": model_id,
        "inputs": data,
    }

    response = requests.post(infer_url, json=json_data, verify=False)
    response_dict = response.json()
    return response_dict["generated_text"]


def streaming_rest_request(data):
    json_data = {
        "model_id": model_id,
        "inputs": data,
    }

    response = requests.post(infer_url, json=json_data, stream=True, verify=False)
    response_tokens = []
    for token in response.iter_lines():
        # we could display each token to the user as it comes in, but for now, we'll just display at the end 
        decoded_token = token.decode("utf-8")
        response_tokens.append(decoded_token)

    return response_tokens

In [None]:
prediction = rest_request("At what temperature does Nitrogen boil?")
prediction

In [None]:
prediction = streaming_rest_request("At what temperature does Nitrogen boil?")
prediction