## install vllm

In [None]:
! git clone https://github.com/Tostino/vllm.git
! cd vllm
! pip install -e .

## Starting services

In [None]:
! python -m vllm.entrypoints.openai.api_server --model OpenHermes-2.5-Mistral-7B/ --max-model-len 32768 --chat-template ./examples/template_chatml.json

In [1]:
import json
import requests  # 

# function for generating API requests
def generate_api_request(query, functions=[]):
    func_string = "\n\n".join([json.dumps(fn) for fn in functions])
    system_message = {
        "role": "system",
        "content": f"""You are a helpful assistant with access to the following functions:

{func_string}

To use these functions respond with:
<functioncall> {{"name": "function_name", "arguments": {{"arg_1": "value_1", "arg_2": "value_2", ...}}}} </functioncall>

Edge cases you must handle:
- If there are no functions that match the user request, you will respond politely that you cannot help."""
    }
    user_message = {"role": "user", "content": query}

    return {
        "model": "gpt-3.5-turbo-16k-0613",
        "messages": [system_message, user_message],
        "temperature": 0.7,
        "max_tokens": 150
    }

# function to send an API request and get the response
def send_request(request_payload):
    response = requests.post('localhost:8000/v1/chat/completions', json=request_payload)
    return response.json()

# 
functions = [
    {
        "name": "call_uber",
        "description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters",
        "parameters":  [
            {"name": "loc", "type": "string", "description": "location of the starting place of the uber ride"},
            {"name":"type", "type": "string", "enum": ["plus", "comfort", "black"], "description": "types of uber ride user is ordering"},
            {"name": "time", "type": "number", "description": "the amount of time in minutes the customer is willing to wait"}
            ]
    },
    {
        "name": "get_current_weather",
        "description": "Gets the current weather for a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, CA",
                },
                "format": {
                    "type": "string",
                    "enum": ["celsius", "fahrenheit"],
                    "description": "The temperature unit to use. Infer this from the users location.",
                },
            },
            "required": ["location", "format"],
        },
    },
]




In [2]:
query = "What is the weather in New York?"
request_payload = generate_api_request(query, functions)
response = send_request(request_payload)
print(response)

{'id': 'cmpl-672fbcb175144b41992236d8d0e9dd1c', 'object': 'chat.completion', 'created': 19974236, 'model': 'gpt-3.5-turbo-16k-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '<functioncall>\n  {\n    "name": "get_current_weather",\n    "arguments": {\n      "location": "New York, NY",\n      "format": "celsius"\n    }\n  }\n</functioncall>'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 374, 'total_tokens': 433, 'completion_tokens': 59}}


In [3]:
query = "Call me an plus Uber in Berkeley at zipcode 94704 in 10 minutes"
# call_uber first function in list
prompt = generate_api_request(query,functions=functions)
response = send_request(prompt)
print(response)
# call_uber last function in list
prompt = generate_api_request(query,functions=functions)
response = send_request(prompt)
print(response)
# Only call_uber in list
prompt = generate_api_request(query,functions=functions)
response = send_request(prompt)
print(response)

{'id': 'cmpl-b0a7fc061d3342ea887472aef9192d5e', 'object': 'chat.completion', 'created': 19975695, 'model': 'gpt-3.5-turbo-16k-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '<functioncall> {"name": "call_uber", "arguments": {"loc": "Berkeley, 94704", "type": "plus", "time": 10}} </functioncall>'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 388, 'total_tokens': 435, 'completion_tokens': 47}}
{'id': 'cmpl-860658086e8b49fc80d943fbb0752887', 'object': 'chat.completion', 'created': 19975696, 'model': 'gpt-3.5-turbo-16k-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '<functioncall>\n  {\n    "name": "call_uber",\n    "arguments": {\n      "loc": "Berkeley 94704",\n      "type": "plus",\n      "time": 10\n    }\n  }\n</functioncall>'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 388, 'total_tokens': 454, 'completion_tokens': 66}}
{'id': 'cmpl-ca2ae780ce41455db7e9d910e53c7b4f', 'object': 'chat.completion', 'created'