# OpenAI Assistant API with MyScale

Using MyScale as external knowledge base for assistant API

In [None]:
!python3 -m pip install --upgrade openai sentence-transformers

## Load Embedding Model

In [2]:
from sentence_transformers import SentenceTransformer

emb_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')

## Connect to MyScale

In [3]:
    
import clickhouse_connect

db = clickhouse_connect.get_client(
    host='msc-4a9e710a.us-east-1.aws.staging.myscale.cloud',
    port=443,
    username='chatdata',
    password='myscale_rocks'
)


## Setting up tools

In [4]:
must_have_cols = ['text', 'title', 'views']
database = 'wiki'
table = 'Wikipedia'

def get_related_pages(subject, where_str, limit):
    q_emb = emb_model.encode(subject).tolist()
    q_emb_str = ",".join(map(str, q_emb))
    if where_str:
        where_str = f"WHERE {where_str}"
    else:
        where_str = ""

    q_str = f"""
        SELECT dist, {','.join(must_have_cols)}
        FROM {database}.{table}
        {where_str}
        ORDER BY distance(emb, [{q_emb_str}]) 
            AS dist ASC
        LIMIT {limit}
        """

    docs = [r for r in db.query(q_str).named_results()]
    return '\n'.join([str(d) for d in docs])

tools = {
    "get_wiki_pages": lambda subject, where_str, limit: get_related_pages(subject, where_str, limit),
}

## Defining tools in Assistant API

In [5]:
from openai import OpenAI

assistant = client.beta.assistants.create(
    name="ChatData",
    instructions=(
        "You are a helpful assistant. Do your best to answer the questions. "
    ),
    tools=[
        {
            "type": "function",
            "function": {
                "name": "get_wiki_pages",
                "description": (
                    "Get some related wiki pages.\n"
                    "You should use schema here to build WHERE string:\n\n"
                    "CREATE TABLE Wikipedia (\n"
                    "    `id` String,\n"
                    "    `text` String, -- abstract of the wiki page. avoid using this column to do LIKE match\n"
                    "    `title` String, -- title of the paper\n"
                    "    `view` Float32,\n"
                    "    `url` String, -- URL to this wiki page\n"
                    "ORDER BY id\n"
                    "You should avoid using LIKE on long text columns."
                ),
                "parameters": {
                    "type": "object",
                    "properties": {
                        "subject": {"type": "string", "description": "a sentence or phrase describes the subject you want to query."},
                        "where_str": {
                            "type": "string",
                            "description": "a sql-like where string to build filter.",
                        },
                        "limit": {"type": "integer", "description": "default to 4"},
                    },
                    "required": ["subject", "where_str", "limit"],
                },
            },
        }
    ],
    model="gpt-3.5-turbo",
)
assistant

Assistant(id='asst_YkfDnV9tt89Kk0rFKbBmVCcv', created_at=1699930778, description=None, file_ids=[], instructions='You are a helpful assistant. Do your best to answer the questions. ', metadata={}, model='gpt-3.5-turbo', name='ChatData', object='assistant', tools=[ToolFunction(function=FunctionDefinition(name='get_wiki_pages', parameters={'type': 'object', 'properties': {'subject': {'type': 'string', 'description': 'a sentence or phrase describes the subject you want to query.'}, 'where_str': {'type': 'string', 'description': 'a sql-like where string to build filter.'}, 'limit': {'type': 'integer', 'description': 'default to 4'}}, 'required': ['subject', 'where_str', 'limit']}, description='Get some related wiki pages.\nYou should use schema here to build WHERE string:\n\nCREATE TABLE Wikipedia (\n    `id` String,\n    `text` String, -- abstract of the wiki page. avoid using this column to do LIKE match\n    `title` String, -- title of the paper\n    `view` Float32,\n    `url` String, -

## Create threads

In [6]:
thread = client.beta.threads.create()
thread

Thread(id='thread_yLrOYHA9OGpGETWus4kpWxnt', created_at=1699930781, metadata={}, object='thread')

## Add messages into the thread

In [7]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="What is Ring in mathematics? Please query the related documents to answer this.",
)
client.beta.threads.messages.list(thread_id=thread.id)

SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_wn5sZU2b2QDC2rZ1PtrxODJQ', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='What is Ring in mathematics? Please query the related documents to answer this.'), type='text')], created_at=1699930783, file_ids=[], metadata={}, object='thread.message', role='user', run_id=None, thread_id='thread_yLrOYHA9OGpGETWus4kpWxnt')], object='list', first_id='msg_wn5sZU2b2QDC2rZ1PtrxODJQ', last_id='msg_wn5sZU2b2QDC2rZ1PtrxODJQ', has_more=False)

## Create run using the thread

The messages attached into the thread will be used as context

In [8]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions= "You must use query tools to look up relevant information to every answer user's question.",
)
run

Run(id='run_0nGLjRYQDXM4d6IbFY5aSd8p', assistant_id='asst_YkfDnV9tt89Kk0rFKbBmVCcv', cancelled_at=None, completed_at=None, created_at=1699930786, expires_at=1699931386, failed_at=None, file_ids=[], instructions="You must use query tools to look up relevant information to every answer user's question.", last_error=None, metadata={}, model='gpt-3.5-turbo', object='thread.run', required_action=None, started_at=None, status='queued', thread_id='thread_yLrOYHA9OGpGETWus4kpWxnt', tools=[ToolAssistantToolsFunction(function=FunctionDefinition(name='get_wiki_pages', parameters={'type': 'object', 'properties': {'subject': {'type': 'string', 'description': 'a sentence or phrase describes the subject you want to query.'}, 'where_str': {'type': 'string', 'description': 'a sql-like where string to build filter.'}, 'limit': {'type': 'integer', 'description': 'default to 4'}}, 'required': ['subject', 'where_str', 'limit']}, description='Get some related wiki pages.\nYou should use schema here to build

## Retrieve state of this run

- If `completed`, then we just get the final message
- If `action_required`, then we need to call the function and submit output to assistant api
- Otherwise, just keep waiting.

In [9]:
from time import sleep
while True:
    run = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
    if run.status == 'completed':
        print(client.beta.threads.messages.list(thread_id=thread.id))
        break
    elif len(run.required_action.submit_tool_outputs.tool_calls) > 0:
        print("> Action Required <")
        print(run.required_action.submit_tool_outputs.tool_calls)
        break
    sleep(1)

> Action Required <
[RequiredActionFunctionToolCall(id='call_oSJeKLx8rsv13dLzfHJxFiPp', function=Function(arguments='{\n  "subject": "Ring mathematics",\n  "where_str": "title LIKE \'%Ring%\'",\n  "limit": 5\n}', name='get_wiki_pages'), type='function')]


## Calling the tool

In [10]:
import json

tool_calls = run.required_action.submit_tool_outputs.tool_calls
outputs = []
for call in tool_calls:
    func = call.function
    outputs.append({"tool_call_id": call.id, "output": tools[func.name](**json.loads(func.arguments))})
outputs

[{'tool_call_id': 'call_oSJeKLx8rsv13dLzfHJxFiPp',
  'output': '{\'dist\': 0.1616729497909546, \'text\': \'In mathematics, rings are algebraic structures that generalize fields: multiplication need not be commutative and multiplicative inverses need not exist. In other words, a "ring" is a set equipped with two binary operations satisfying properties analogous to those of addition and multiplication of integers. Ring elements may be numbers such as integers or complex numbers, but they may also be non-numerical objects such as polynomials, square matrices, functions, and power series.\', \'title\': \'Ring (mathematics)\', \'views\': 1880.995849609375}\n{\'dist\': 0.17147916555404663, \'text\': \'A ring is a set "R" equipped with two binary operations + (addition) and ⋅ (multiplication) satisfying the following three sets of axioms, called the ring axioms\', \'title\': \'Ring (mathematics)\', \'views\': 1880.995849609375}\n{\'dist\': 0.21737223863601685, \'text\': \'The axioms of a ring

## Submitting the tool output

In [11]:
run = client.beta.threads.runs.submit_tool_outputs(
  thread_id=thread.id,
  run_id=run.id,
  tool_outputs=outputs
)

## Get the message from the thread

In [12]:
messages = client.beta.threads.messages.list(thread_id=thread.id)
messages

SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_0MNiKrSFPfVqTFwXIVwwHnwR', assistant_id='asst_YkfDnV9tt89Kk0rFKbBmVCcv', content=[MessageContentText(text=Text(annotations=[], value='In mathematics, a ring is an algebraic structure that generalizes fields. It is a set equipped with two binary operations, addition and multiplication, satisfying properties analogous to those of addition and multiplication of integers. The elements of a ring can be numbers such as integers or complex numbers, but they can also be non-numerical objects such as polynomials, square matrices, functions, and power series. A ring follows a set of axioms called the ring axioms, which elaborate on the properties of addition and multiplication.\n\nYou can find more information about rings in mathematics on the following Wikipedia page: [Ring (mathematics)](https://en.wikipedia.org/wiki/Ring_(mathematics))\n\nIs there anything else you would like to know?'), type='text')], created_at=1699930798, file_ids=[

## Delete the assistant

You can also just delete this thread if you want to re-use this assistant. Just remember to save its `assistant_id`

In [13]:
client.beta.assistants.delete(assistant_id=assistant.id)

AssistantDeleted(id='asst_YkfDnV9tt89Kk0rFKbBmVCcv', deleted=True, object='assistant.deleted')