In [None]:
! pip install autogen_core autogen_ext autogen_agentchat proton_driver

In [1]:
import os

from proton_driver import client

timeplus_host = os.getenv("TIMEPLUS_HOST") or "localhost"
timeplus_user = os.getenv("TIMEPLUS_USER") or "proton"
timeplus_password = os.getenv("TIMEPLUS_PASSWORD") or "timeplus@t+"

class Tools:
    def __init__(self) -> None:
        self.client = client.Client(host=timeplus_host, port=8463, user=timeplus_user,password=timeplus_password)

    def list_table(self, *args):
        result = []
        rows = self.client.execute_iter("SHOW STREAMS")
        for row in rows:
            result.append(row[0])
        return result
    
    def describe_table(self, *args):
        name = args[0]
        result = []
        rows = self.client.execute_iter(f"DESCRIBE {name.strip()}")
        for row in rows:
            col = {}
            col["name"] =  row[0]
            col["type"] =  row[1]
            result.append(col)
        return result

    def run(self, tool_name, *args):
        result = getattr(self, tool_name)(*args)
        return result

    def list(self):
        return ["list_table", "describe_table"]

In [2]:
from typing import Annotated

tool = Tools()

def list_table() -> Annotated[str, "The name of tables in the system"]:
    return tool.list_table()

def describe_table(name: Annotated[str, "The name of the table"]) -> Annotated[str, "schema definition of the table"]:
    return tool.describe_table(name)


In [6]:
import asyncio
from autogen_core import CancellationToken
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage


async def main() -> None:
    model_client = OpenAIChatCompletionClient(model="gpt-4o", seed=42, temperature=0)
    assistant = AssistantAgent(
        name="assistant",
        system_message="""You are a asistent help generating SQL based on input questions. 
Please stop when you have the SQL, no need to execute the SQL
To generate SQL, here are rules:
* the grammar follows ClickHouse style
* all datatypes MUST be in lowercase, such uint32
* all keywords MUST be in lowercase, such as nullable
* for normal query, add table() function to the table name, for example select count(*) from table(table_name)
* for real time query, where continously return new result to the user, append a time range, for example
  select count(*) from table_name where _tp_time > now() -1h
  which will return the number of event received in the past 1 hour

You have access to tools provided.
including
1. def list_table() -> Annotated[str, "The name of tables in the system"]:
2. def describe_table(name: Annotated[str, "The name of the table"]) -> Annotated[str, "schema definition of the table"]:

Once the SQL is generate, terminate the chat and provide the SQL as Final Answer

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take
Action Input: the input to the action
Observation: the result of the action
... (this process can repeat multiple times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!
Question: {input}
        """,
        model_client=model_client,
        tools=[list_table, describe_table],
        reflect_on_tool_use=False, # Set to True to have the model reflect on the tool use, set to False to return the tool call result directly.
    )
    input = "which bank has most debt?"
    source = "user"
    count = 0
    while True:
        if "final answer" in input.lower():
            print(f"Found 'Final answer' {input} (case-insensitive)!")
            break
            
        response = await assistant.on_messages([TextMessage(content=input, source=source)], CancellationToken())
        input = response.chat_message.content
        source = "Assistant"
        count += 1
        if count > 10:
            print("Max iteration reached, exit")
            break

await main()


Found 'Final answer' The table `kafka_cdc_postgres_credit_history` contains a single column named `raw` of type `string`. This suggests that the data might be stored in a serialized or unstructured format. To determine which bank has the most debt, we would typically need to parse this data to extract relevant fields such as bank name and debt amount.

However, given the current structure, I will assume that the necessary parsing logic is handled elsewhere, and I will provide a basic SQL query to select the relevant data. If the data were structured, the query might look something like this:

```sql
select bank_name, sum(debt_amount) as total_debt
from table(kafka_cdc_postgres_credit_history)
group by bank_name
order by total_debt desc
limit 1
```

This query assumes that there are fields named `bank_name` and `debt_amount` within the `raw` data that can be extracted and aggregated. If you have a specific parsing function or method to extract these fields, you would need to apply it wi