In [None]:
!pip install vllm==0.10.2



In [None]:
!pip install transformers accelerate



In [None]:
!pip install litellm



In [None]:
!nohup vllm serve Qwen/Qwen3-1.7B \
  --enable-auto-tool-choice \
  --tool-call-parser hermes \
  --reasoning-parser deepseek_r1 \
  > vllm.log &

nohup: redirecting stderr to stdout


In [None]:
!tail -n 20 vllm.log # to watch last 20 rows og vllm.log, about 2/3 minutes to have the server ready -> Application startup complete

[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /v1/responses/{response_id}/cancel, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /v1/chat/completions, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /v1/completions, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /v1/embeddings, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /pooling, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /classify, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /score, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /v1/score, Methods: POST
[1;36m(APIServer pid=30069)[0;0m INFO 11-22 15:26:37 [launcher.py:44] Route: /v1/audio/transcriptions, Methods: POST
[1;36

In [None]:
import sqlite3
db_name = "shakespeare.sqlite"
conn = sqlite3.connect(db_name)

In [None]:
import pandas as pd

def query_db(sql: str):
    """
    Executes an SQL query and returns the result as JSON

    Args:
        sql: The SQL query to execute

    Returns:
        The result of the query as JSON
    """
    try:
        df = pd.read_sql_query(sql, conn)
        return df.to_dict(orient="records")
    except Exception as e:
        return {"error": str(e)}

In [None]:
def get_schema(connection):
    schema_str = ""
    cursor = connection.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()

    for table_name in tables:
        table_name = table_name[0]
        if table_name.startswith("sqlite_"):
            continue

        cursor.execute(f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}';")
        create_statement = cursor.fetchone()[0]
        schema_str += create_statement + ";\n"

    return schema_str

In [None]:
db_schema = get_schema(conn)

In [None]:
def get_function_by_name(name):
    if name == "query_db":
        return query_db

TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "query_db",
            "description": "Executes a single SQL query on the database. Use this to make exploratory queries in order to find the final answer.",
            "parameters": {
                "type": "object",
                "properties": {
                    "sql": {
                        "type": "string",
                        "description": 'The SQL query to execute on the database.',
                    }
                },
                "required": ["sql"],
            },
        },
    }
]

In [None]:
USER_QUESTION = f"""
Database Schema:
{db_schema}
Evidence:
character "Shylock" refers to CharName = 'Shylock'
Question:
Give the title of the work that contains the character 'Shylock'.
"""



SYSTEM_PROMPT = """You are a SQL expert. Your task is to answer the user’s question by generating a final SQL query.
The schema of the database is provided in the user question.
You have access to a tool called `query_db` that you can use to run exploratory queries on the database. Use it only if needed to check values and validate assumptions.
When you perform exploratory queries, use LIMIT 3 if the output of the query may have more than 3 rows.
Eventually, provide ONLY the final SQL query enclosed within the tags <answer> and </answer>, not the answer to the query.
"""

In [None]:
from litellm import completion
import json
import re

pred_query = None
model_name = "hosted_vllm/Qwen/Qwen3-1.7B"
MAX_TURNS = 10 # a maximum number of turns in order to avoid infinite loops

messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": USER_QUESTION}
]

completion_params = {
    "model": model_name,
    "temperature": 0.6,
    "top_p": 0.95,
    "max_tokens": 4096,
    "repetition_penalty": 1.05,
    "top_k": 20,
    "min_p": 0,

    "extra_body": {
        "chat_template_kwargs": {
            "enable_thinking": True,
        }
    },
    "api_base": "http://127.0.0.1:8000/v1",
    "tools": TOOLS
}

print(USER_QUESTION)

for i in range(MAX_TURNS):

    # call the model
    response = completion(messages=messages, **completion_params)
    response_message = response.choices[0].message

    # add the model's response (thought + tool_call) to the history
    messages.append(response_message.model_dump())

    # check if the model has called a tool
    tool_calls = response_message.get("tool_calls", None)
    if tool_calls:
        # execution of the tool call
        for tool_call in tool_calls:
            call_id = tool_call["id"]
            if fn_call := tool_call.get("function"):
                fn_name = fn_call["name"]
                fn_args = json.loads(fn_call["arguments"])
                sql_query = fn_args.get("sql", "")
                pred_query = sql_query.strip()
                fn_res = get_function_by_name(fn_name)(**fn_args)

                #add the tool result (as JSON) to the history for the model
                messages.append({
                    "role": "tool",
                    "content": json.dumps({
                        "query_executed": sql_query,
                        "result": fn_res
                    }),
                    "tool_call_id": call_id,
                })

    # Check if the model is finished (gave a final answer without tool calls)
    elif response_message.content:
        final_output = response_message.content.strip()
        # Estrai il contenuto tra i tag <answer>
        match = re.search(r"<answer>\s*(.*?)\s*</answer>", final_output, re.DOTALL)
        if match:
          pred_query = match.group(1).strip()
          print(f"predicted query:\n{pred_query}")
        else:
          print("No <answer> tags found in final_output.")

        break


conn.close()


Database Schema:
CREATE TABLE "chapters"
(
    id          INTEGER
        primary key autoincrement,
    Act         INTEGER not null,
    Scene       INTEGER not null,
    Description TEXT    not null,
    work_id     INTEGER not null
        references works
);
CREATE TABLE "characters"
(
    id          INTEGER
        primary key autoincrement,
    CharName    TEXT not null,
    Abbrev      TEXT not null,
    Description TEXT not null
);
CREATE TABLE "paragraphs"
(
    id           INTEGER
        primary key autoincrement,
    ParagraphNum INTEGER           not null,
    PlainText    TEXT              not null,
    character_id INTEGER           not null
        references characters,
    chapter_id   INTEGER default 0 not null
        references chapters
);
CREATE TABLE "works"
(
    id        INTEGER
        primary key autoincrement,
    Title     TEXT    not null,
    LongTitle TEXT    not null,
    Date      INTEGER not null,
    GenreType TEXT    not null
);

Evidence:
mos

In [None]:
messages

[{'role': 'system',
  'content': 'You are a SQL expert. Your task is to answer the user’s question by generating a final SQL query.\nThe schema of the database is provided in the user question.\nYou have access to a tool called `query_db` that you can use to run exploratory queries on the database. Use it only if needed to check values and validate assumptions.\nWhen you perform exploratory queries, use LIMIT 3 if the output of the query may have more than 3 rows.\nEventually, provide ONLY the final SQL query enclosed within the tags <answer> and </answer>, not the answer to the query.\n'},
 {'role': 'user',
  'content': '\nDatabase Schema:\nCREATE TABLE "chapters"\n(\n    id          INTEGER\n        primary key autoincrement,\n    Act         INTEGER not null,\n    Scene       INTEGER not null,\n    Description TEXT    not null,\n    work_id     INTEGER not null\n        references works\n);\nCREATE TABLE "characters"\n(\n    id          INTEGER\n        primary key autoincrement,\n 

In [None]:
output_string = ""
temp_parts = []

def clean_and_format(text):
    """Rimuove i newline, sostituisce le doppie virgolette con apici singoli e pulisce gli spazi multipli."""
    if text is None:
        return ''
    # Rimuove \n e li sostituisce con uno spazio, poi sostituisce le doppie virgolette
    cleaned = text.replace('\n', ' ').replace('"', "'")
    cleaned = ' '.join(cleaned.split())
    return cleaned

# Itera attraverso i messaggi nell'ordine cronologico
for message in messages:

    #Estrai il Reasoning
    reasoning = ""

    if 'provider_specific_fields' in message and 'reasoning_content' in message['provider_specific_fields']:
        reasoning = message['provider_specific_fields']['reasoning_content']


    if not reasoning and 'reasoning_content' in message:
        reasoning = message['reasoning_content']

    if reasoning:
        temp_parts.append("[STEP]" + clean_and_format(reasoning))

    if message.get('tool_calls'):
        for tool_call in message['tool_calls']:
            try:
                args = json.loads(tool_call['function']['arguments'])
                sql_query = args.get('sql', 'N/A')
                temp_parts.append("[CALL] " + clean_and_format(sql_query))
            except json.JSONDecodeError:
                temp_parts.append("[CALL] Error")

    if message.get('role') == 'tool':
        content_str = message.get('content', '{}').strip()
        try:
            content_json = json.loads(content_str)
            result = content_json.get('result', 'Nessun Risultato Trovato')
            result_str = json.dumps(result)
            temp_parts.append("[ANS] " + clean_and_format(result_str))
        except json.JSONDecodeError:
            temp_parts.append("[ANS] Errore nel parsing della risposta del tool: " + clean_and_format(content_str))


output_string = " ".join(temp_parts).strip()

output_string = output_string.strip()

print(output_string)

[STEP]Okay, let's tackle this problem. The user wants to know which chapter has the most paragraphs and its description. First, I need to understand the tables involved. There's the chapters table, which has chapter_id, Act, Scene, Description, and work_id. Then there's the paragraphs table, which has paragraph_id, ParagraphNum, PlainText, character_id, and chapter_id. The characters table is for character info, but maybe not directly needed here. The question is about finding the chapter with the maximum number of paragraphs. So, I need to count the number of paragraphs per chapter. But wait, each paragraph is associated with a chapter via chapter_id. So, I should join the chapters and paragraphs tables on chapter_id. But how do I get the count of paragraphs per chapter? Let's think. For each chapter, count the number of paragraphs it has. Then find the chapter with the highest count. So the plan is: join chapters and paragraphs on chapter_id, group by chapter_id, count the paragraphs

In [None]:
def compute_execution_accuracy(gt_results, predict_results):
  num_correct = 0
  num_queries = len(gt_results)
  mismatch_idx = []

  for i, result in enumerate(gt_results):
      if set(result['results']) == set(predict_results[i]['results']):
          num_correct += 1
      else:
          mismatch_idx.append(i)

  acc = (num_correct / num_queries) * 100

  return acc

In [None]:
def run_query(db_path, query):
  conn = sqlite3.connect(db_path)
  cursor = conn.cursor()
  cursor.execute(query)
  rows = cursor.fetchall()
  conn.close()

  # Flatten results and convert to list of strings
  return [row[0] for row in rows]

In [None]:
gt_query = """SELECT DISTINCT w.Title FROM characters c JOIN paragraphs p ON c.id = p.character_id JOIN chapters ch ON p.chapter_id = ch.id JOIN works w ON ch.work_id = w.id WHERE c.CharName = 'Shylock';"""

In [None]:
pred_query

'SELECT chapters.Description\nFROM chapters\nJOIN paragraphs ON chapters.id = paragraphs.chapter_id\nGROUP BY chapters.id\nORDER BY COUNT(paragraphs.id) DESC\nLIMIT 1;'

In [None]:
rows_gt = run_query(db_name, gt_query)
gt_res = [{"results": rows_gt}]

rows_pred = run_query(db_name, pred_query)
pred_res = [{"results": rows_pred}]

In [None]:
acc = compute_execution_accuracy(gt_res, pred_res)
print(f"Accuracy of the generated SQL query: {acc}")

Accuracy of the generated SQL query: 100.0
