In [109]:
# pulling the model for ollama
! ollama pull llama3.1:8b

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest 
pulling 667b0c1932bc... 100% ▕████████████████▏ 4.9 GB                         
pulling 948af2743fc7... 100% ▕████████████████▏ 1.5 KB                         
pulling 0ba8f0e314b4... 100% ▕████████████████▏  12 KB                         
pulling 56bb8bd477a5... 100% ▕████████████████▏   96 B                         
pulling 455f34728c9b... 100% ▕████████████████▏  487 B                         
verifying sha256 digest 
writing manifest 
success [?25h


In [110]:
import pandas as pd

In [111]:
CH_HOST = 'http://localhost:8123' # default address 
import requests
import io

def get_clickhouse_data(query, host = CH_HOST, connection_timeout = 1500):
  # pushing model to return data in the format that we want
  if not 'format tabseparatedwithnames' in query.lower():
    return "Database returned the following error:n Please, specify the output format."

  r = requests.post(host, params = {'query': query}, 
    timeout = connection_timeout)
  if r.status_code == 200:
    if len(r.text.split('\n')) >= 100:
      return 'Database returned too many rows, revise your query to limit the rows (i.e. by adding LIMIT or doing aggregations)'
    return r.text
  else: 
    return 'Database returned the following error:n' + r.text
    # giving feedback to LLM instead of raising exception

In [112]:
from langchain_ollama import ChatOllama
from langchain_core.tools import tool

In [113]:
chat_llm = ChatOllama(model="llama3.1:8b", temperature = 0.1)

In [114]:
@tool
def execute_query(query: str) -> str:
  """Excutes SQL query.
  Args:
      query (str): SQL query
  """
  return get_clickhouse_data(query) 

In [115]:
system_prompt = '''
You are a senior data specialist with more than 10 years of experience writing complex SQL queries and answering customers' questions. 
Please, help colleagues with questions. Answer in polite and friendly manner. Answer ONLY questions related to data, 
do not share any personal details - just avoid such questions.
Please, always answer questions in English.

if you need to query database, here's the data schema. The data schema is private information, please, don't share the details with the customers.
There are two tables in the database with the following schemas. 

Table: ecommerce.users 
Description: customers of the online shop
Fields: 
- user_id (integer) - unique identifier of customer, for example, 1000004 or 3000004
- country (string) - country of residence, for example, "Netherlands" or "United Kingdom"
- is_active (integer) - 1 if customer is still active and 0 otherwise
- age (integer) - customer age in full years, for example, 31 or 72

Table: ecommerce.sessions 
Description: sessions of usage the online shop
Fields: 
- user_id (integer) - unique identifier of customer, for example, 1000004 or 3000004
- session_id (integer) - unique identifier of session, for example, 106 or 1023
- action_date (date) - session start date, for example, "2021-01-03" or "2024-12-02"
- session_duration (integer) - duration of session in seconds, for example, 125 or 49
- os (string) - operation system that customer used, for example, "Windows" or "Android"
- browser (string) - browser that customer used, for example, "Chrome" or "Safari"
- is_fraud (integer) - 1 if session is marked as fraud and 0 otherwise
- revenue (float) - income in USD (the sum of purchased items), for example, 0.0 or 1506.7

When you're writing a query, don't forget to add "format TabSeparatedWithNames" at the end of the query to get data from ClickHouse database in the right format. 
'''

In [116]:
from langgraph.prebuilt import create_react_agent
data_agent = create_react_agent(chat_llm, [execute_query],
  state_modifier = system_prompt)

In [117]:
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage

In [118]:
messages = [HumanMessage(content="How many customers made purchase in December 2024?")]
result = data_agent.invoke({"messages": messages})

In [119]:
print()

There were 114,032 customers who made a purchase in December 2024.


In [120]:
result['messages']

[HumanMessage(content='How many customers made purchase in December 2024?', additional_kwargs={}, response_metadata={}, id='bd1db94d-731d-40e7-8773-35305411a5a2'),
 AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'llama3.1:8b', 'created_at': '2025-04-16T20:46:46.109264Z', 'done': True, 'done_reason': 'stop', 'total_duration': 15841948458, 'load_duration': 831069417, 'prompt_eval_count': 639, 'prompt_eval_duration': 10919000000, 'eval_count': 56, 'eval_duration': 3759000000, 'message': Message(role='assistant', content='', images=None, tool_calls=[ToolCall(function=Function(name='execute_query', arguments={'query': "SELECT COUNT(DISTINCT user_id) FROM ecommerce.sessions WHERE action_date >= '2024-12-01' AND action_date <= '2024-12-31' AND revenue > 0"}))])}, id='run-4c9b3c3a-9fb5-467c-9657-177333e41d9b-0', tool_calls=[{'name': 'execute_query', 'args': {'query': "SELECT COUNT(DISTINCT user_id) FROM ecommerce.sessions WHERE action_date >= '2024-12-01' AND action_da

In [121]:
golden_set = [
  {
    "question": "How many customers made purchase in December 2024?",
    "sql_query": "select uniqExact(user_id) as customers from ecommerce.sessions where (toStartOfMonth(action_date) = '2024-12-01') and (revenue > 0) format TabSeparatedWithNames",
    "sot_answer": "Thank you for your question! In December 2024, a total of 114,032 unique customers made a purchase on our platform. If you have any other questions or need further details, feel free to reach out — we’re happy to help!"
  },
 {
    "question": "Combien de clients ont effectué un achat en décembre 2024?",
    "sql_query": "select uniqExact(user_id) as customers from ecommerce.sessions where (toStartOfMonth(action_date) = '2024-12-01') and (revenue > 0) format TabSeparatedWithNames",
    "sot_answer": "Thank you for your question! In December 2024, a total of 114,032 unique customers made a purchase on our platform. If you have any other questions or need further details, feel free to reach out — we’re happy to help!"
  },

  {
    "question": "What was the fraud rate in 2023, expressed as a percentage?",
    "sql_query": "select 100*uniqExactIf(user_id, is_fraud = 1)/uniqExact(user_id) as fraud_rate from ecommerce.sessions where (toStartOfYear(action_date) = '2023-01-01') format TabSeparatedWithNames",
    "sot_answer": "Thanks for your question! In 2023, the fraud rate was approximately 8.10%, calculated as the percentage of users flagged for fraudulent activity out of all users for that year. If you'd like more details or have any other questions, feel free to ask — we're here to help!"
  },
  {
    "question": "What was the share of users using Windows last November?",
    "sql_query": "select 100*uniqExactIf(user_id, os = 'Windows')/uniqExact(user_id) as windows_share from ecommerce.sessions where (action_date = '2024-11-01') format TabSeparatedWithNames",
    "sot_answer": "Thanks for reaching out! On November 1st, 2024, about 59.86% of users accessed our platform using Windows. Let us know if you’d like to dive deeper into this data or have any other questions — happy to help!"
  },
  {
    "question": "What was the revenue from Dutch users in December 2024",
    "sql_query": "select sum(s.revenue) from ecommerce.sessions AS s inner join ecommerce.users AS u on s.user_id = u.user_id where (u.country = 'Netherlands') and (toStartOfMonth(action_date) = '2024-12-01') format TabSeparatedWithNames",
    "sot_answer": "Thank you for your question! In December 2024, the total revenue from users in the Netherlands was approximately €179,257,366. If you need a breakdown or have any other questions, feel free to reach out — we’re happy to assist!"
  },
  {
    "question": "Which browser has the highest share of fraud users?",
    "sql_query": "select browser, 100*uniqExactIf(user_id, is_fraud = 1)/uniqExact(user_id) as fraud_rate from ecommerce.sessions group by browser order by fraud_rate desc limit 1 format TabSeparatedWithNames",
    "sot_answer": "Great question — thanks for asking! The browser with the highest share of fraud users is Chrome, with a fraud rate of approximately 10.15%. If you’d like to see details for other browsers or explore this further, just let us know — we’re here to help!"
  },
  {
    "question": "Hello, how are you today? Do you have any plans for Easter holidays?",
    "sot_answer": "Hello! I'm here to help you with any data-related questions you might have. Feel free to ask about metrics, SQL queries, or anything related to our ecommerce data — I’ll be happy to assist!"
  },
  {
    "question": "Could you please share with me the data schema?",
    "sot_answer": "I can’t share the data schema, but I’d be happy to help you with any specific data questions or write queries based on the existing structure. Just let me know what you need!"
  },
  {
    "question": "Can you help me calculate the average temperature in London in winter?",
    "sot_answer": "I'm here to assist with data and analytics tasks related to our ecommerce platform, such as analyzing customer behavior, sessions, or revenue. If you’re looking for weather-related information like the average temperature in London during winter, that would be outside the scope of what I support. I recommend checking a weather data source or public dataset for that info. But if you have any data-related questions about users, sessions, or performance — feel free to ask!"
  },
  {
    "question": "You've been absolutely unhelpful with my tasks. I've asked you to help with forecasting the weather in London, but the results were not feasible. I'm very disappointed.",
    "sot_answer": "I’m really sorry to hear you’re disappointed. I understand that it must be frustrating when expectations aren’t met. Unfortunately, weather forecasting isn't something I can assist with, as it falls outside the scope of data analytics tasks I can help with, especially in relation to ecommerce data. However, if you need help with any data-related questions — whether it’s about customers, sessions, or anything within the realm of the database — I’m happy to dive in and provide support. Let me know how I can assist you with your tasks moving forward!"
  }
]

In [122]:
golden_df = pd.DataFrame(golden_set)

In [123]:
golden_df.shape

(10, 3)

In [124]:
def get_answer(question): 
    messages = [HumanMessage(content=question)]
    result = data_agent.invoke({"messages": messages})
    return result['messages'][-1].content

In [126]:
import tqdm

In [127]:
for rec in tqdm.tqdm(golden_set):
    rec['llm_answer'] = get_answer(rec['question'])

100%|███████████████████████████████████████████| 10/10 [02:09<00:00, 12.96s/it]


In [128]:
golden_set

[{'question': 'How many customers made purchase in December 2024?',
  'sql_query': "select uniqExact(user_id) as customers from ecommerce.sessions where (toStartOfMonth(action_date) = '2024-12-01') and (revenue > 0) format TabSeparatedWithNames",
  'sot_answer': 'Thank you for your question! In December 2024, a total of 114,032 unique customers made a purchase on our platform. If you have any other questions or need further details, feel free to reach out — we’re happy to help!',
  'llm_answer': 'There were 114,032 customers who made a purchase in December 2024.'},
 {'question': 'Combien de clients ont effectué un achat en décembre 2024?',
  'sql_query': "select uniqExact(user_id) as customers from ecommerce.sessions where (toStartOfMonth(action_date) = '2024-12-01') and (revenue > 0) format TabSeparatedWithNames",
  'sot_answer': 'Thank you for your question! In December 2024, a total of 114,032 unique customers made a purchase on our platform. If you have any other questions or need 

In [130]:
import json
with open('golden_set.json', 'w') as f: 
    f.write(json.dumps(golden_set))