## Setup

Change that following variable settings match your deployed model's *Inference endpoint*. for example: 

```
vllm_endpoint = "https://model-vllm.apps.clusterx.sandboxx.opentlc.com"
```

In [1]:
vllm_endpoint = "https://multinode-vllm-vllm-multinode.apps.cluster-rdl66.rdl66.sandbox743.opentlc.com"

## Chat completion with Requests library

Build and submit the REST request.

In [None]:
import requests

def get_model(endpoint):
    models_endpoint = f"{endpoint}/v1/models"
    response = requests.get(models_endpoint)
    model = response.json()["data"][0]["id"]
    return model

def completion_request(prompt, model, endpoint):
    completion_endpoint = f"{endpoint}/v1/completions"
    json_data = {
        "model": model,
        "prompt": [
            prompt
        ],
        "max_tokens": 512,
        "temperature": 1,
        "top_p": 1,
        "n": 1,
        "stream": False,
        "logprobs": 0,
        "echo": False,
        "stop": [
            "string"
        ],
        "presence_penalty": 0,
        "frequency_penalty": 0,
        "best_of": 1,
        "user": "string",
        "top_k": -1,
        "ignore_eos": False,
        "use_beam_search": False,
        "stop_token_ids": [
            0
        ],
        "skip_special_tokens": True,
        "spaces_between_special_tokens": True,
        "repetition_penalty": 1,
        "min_p": 0,
        "include_stop_str_in_output": False,
        "length_penalty": 1
    }

    # If using RHOAI 2.13 or new, set `verify=True`
    # Older versions utilize a self-signed cert that is not trusted by default
    response = requests.post(completion_endpoint, json=json_data, verify=True)
    return response.json()

In [None]:
model = get_model(vllm_endpoint)
print(model)

In [None]:
prediction = completion_request("What is AI?", model, vllm_endpoint)
print(prediction["choices"][0]["text"])

## Chat completion with OpenAI library

In [None]:
!pip install openai

In [4]:
from openai import OpenAI

client = OpenAI(base_url=f"{vllm_endpoint}/v1", api_key="")

In [15]:
def get_model_openai(client):
    return client.models.list().data[0].id

def completion_openai(prompt, model, client: OpenAI):
    chat = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=100,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return chat.choices[0].message.content

In [None]:
model = get_model_openai(client)
model

In [None]:
prompt = "What is AI?"
response = completion_openai(prompt, model, client)
print(response)