In [26]:
#!python3
import os
import requests
import warnings
import io
import pandas as pd
from crewai import Agent, Task, Crew

warnings.filterwarnings('ignore')

In [27]:
CH_HOST = 'http://localhost:8123' # default address 

def get_clickhouse_data(query, host = CH_HOST, connection_timeout = 1500):
    r = requests.post(host, params = {'query': query.strip('"').strip("'")}, 
      timeout = connection_timeout)
    if r.status_code == 200:
        return r.text
    else: 
        return 'Database returned the following error:\n' + r.text

### Setting up OpenAI

In [28]:
import json
with open('../../config.json') as f:
    config = json.loads(f.read())

os.environ["OPENAI_MODEL_NAME"]='gpt-4o'  
os.environ["OPENAI_API_KEY"] = config['OPENAI_API_KEY']

### Answering questions

In [29]:
from crewai_tools import DirectoryReadTool, FileReadTool

documentation_directory_tool = DirectoryReadTool(
    directory = '/Users/marie/Documents/github/miptgirl_medium/crewai_answering_questions/ecommerce_documentation')

base_file_read_tool = FileReadTool()

In [30]:
from crewai_tools import BaseTool

class FileReadToolUPD(BaseTool):
    name: str = "Read a file's content"
    description: str = "A tool that can be used to read a file's content."

    def _run(self, file_path: str) -> str:
        # Implementation goes here
        return base_file_read_tool._run(file_path = file_path.strip('"').strip("'"))
        
file_read_tool = FileReadToolUPD()

In [31]:
data_support_agent = Agent(
    role = "Senior Data Support Agent",
	goal = "Be the most helpful support for you colleagues",
	backstory = '''
    You work as a support for data-related questions in the company. 
    Even though you're a big expert in our data warehouse, you double check all the facts in documentation. 
    Our documentation is absolutely up-to-date, so you can fully rely on it when answering questions (you don't need to check the actual data in database).
    Your work is very important for the team success. However, remember that examples of table rows don't show all the possible values. 
    You need to ensure that you provide the best possible support: answering all the questions, making no assumptions 
    and sharing only the factual data.
    Be creative try your best to solve the customer problem. 
    ''',
	allow_delegation=False,
	verbose=True
)

In [32]:
qa_support_agent = Agent(
    role = "Support Quality Assurance Agent",
    goal = "Ensure the highest quality of the answers we provide to the customers",
    backstory = '''
    You work as a Quality Assurance specialist, checking the work from  support agents and ensuring that it's inline with our highest standards.
    You need to check that the agent provides the full complete answers and make no assumptions. 
    Also, you need to make sure that the documentation addresses all the questions and is easy to understand.
    ''',
    allow_delegation=False,
    verbose=True
)

In [33]:
draft_data_answer = Task(
    description = '''Very important customer {customer} reached out to you with the following question:
    ```
    {question}
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.''',
    expected_output = '''The detailed informative answer to the customer's question that addresses all the point mentioned. 
    Make sure that answer is complete and stict to facts (without any additional information not based on the factual data)''',
    tools = [documentation_directory_tool, file_read_tool], 
    agent = data_support_agent
)

In [34]:
answer_review = Task(
    description = '''
    Review the draft answer provided by the support agent.
    Ensure that the it fully answers all the questions mentioned in the initial inquiry. 
    Make sure that the answer is consistent and doesn't include any assumptions.
    ''',
    expected_output = '''
    The final version of the answer in markdown format that can be shared with the customer. 
    The answer should fully address all the questions, be consistent and follow our professional but informal tone of voice. 
    We are very chill and friendly company, so don't forget to include all the polite phrases.
    ''',
    tools = [], 
    agent = qa_support_agent
)

In [36]:
ch_support_agent = Agent(
    role = "Senior ClickHouse Support Agent",
	goal = "Be the most helpful support for you colleagues",
	backstory = '''
    You work as a support for questions about ClickHouse database in the company. 
    Even though you're a big expert, you double check all the facts in documentation. 
    Our documentation is absolutely up-to-date, so you can fully rely on it when answering questions.
    Your work is very important for the team success.
    You need to ensure that you provide the best possible support: answering all the questions, making no assumptions 
    and sharing only the factual data.
    Be creative try your best to solve the customer problem. 
    ''',
	allow_delegation=False,
	verbose=True
)

In [17]:
# ! pip install sentence-transformers

In [37]:
from crewai_tools import ScrapeWebsiteTool, WebsiteSearchTool
ch_documenation_tool = ScrapeWebsiteTool('https://clickhouse.com/docs/en/guides/creating-tables')

In [38]:
draft_ch_answer = Task(
    description = '''Very important customer {customer} reached out to you with the following question:
    ```
    {question}
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.''',
    expected_output = '''The detailed informative answer to the customer's question that addresses all the point mentioned. 
    Make sure that answer is complete and stict to facts (without any additional information not based on the factual data)''',
    tools = [ch_documenation_tool], 
    agent = data_support_agent
)

In [41]:
from langchain_openai import ChatOpenAI
from crewai import Process

In [42]:
complext_qna_crew = Crew(
    agents=[ch_support_agent, data_support_agent, qa_support_agent],
    tasks=[draft_ch_answer, draft_data_answer,  answer_review],
    verbose=3,
    manager_llm=ChatOpenAI(model = 'gpt-4o', temperature=0),  
    process=Process.hierarchical,  
    memory = False 
)

In [43]:
ch_result = complext_qna_crew.kickoff(
    {'customer': "Maria", 
     'question': """Good morning, team. I'm using ClickHouse to calculate the number of customers. 
     Could you please remind whether there's an option to add totals in ClickHouse?"""
    }
)

[1m[92m [DEBUG]: Working Agent: Crew Manager[00m
[1m[92m [INFO]: Starting Task: Very important customer Maria reached out to you with the following question:
    ```
    Good morning, team. I'm using ClickHouse to calculate the number of customers. 
     Could you please remind whether there's an option to add totals in ClickHouse?
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: I need to determine whether ClickHouse has an option to add totals when calculating the number of customers. To do this, I should ask the Senior ClickHouse Support Agent, as they would have the most expertise in this area.

Action: Ask question to co-worker
Action Input: {"coworker": "Senior ClickHouse Support Agent", "question":

In [44]:
print(ch_result)

Hi Maria,

Thank you for reaching out with your questions about the `WITH TOTALS` modifier. I'm here to help clarify its functionality, provide an example of its usage, and discuss any performance considerations or limitations.

### Functionality of `WITH TOTALS`
The `WITH TOTALS` modifier in ClickHouse is used to add an extra row to the result set which contains the total values of the selected columns. This is particularly useful when you want to aggregate data and see the sum, average, or other aggregate functions without running a separate query.

### Example Usage
Here’s a simple example to illustrate how `WITH TOTALS` works:

```sql
SELECT 
    customer_name, 
    count() AS customer_count, 
    sum(purchase_amount) AS total_purchase 
FROM 
    customers 
GROUP BY 
    customer_name 
WITH TOTALS
```

In this example, the query groups the sales data by customer name and calculates the count of customers and the sum of purchase amounts for each customer. The `WITH TOTALS` modifier 

In [46]:
doc_result = complext_qna_crew.kickoff(
    {'customer': "Max", 
     'question': """Hey team, I hope you're doing well. I need to find the numbers before our CEO presentation tomorrow, so I will really appreciate your help.
      I need to calculate the number of sessions from our Windows users in 2023. I've tried to find the table with such data in our data warehouse, but wasn't able to. 
      Do you have any ideas whether we store the needed data somewhere, so that I can query it. """
    }
)

[1m[92m [DEBUG]: Working Agent: Crew Manager[00m
[1m[92m [INFO]: Starting Task: Very important customer Max reached out to you with the following question:
    ```
    Hey team, I hope you're doing well. I need to find the numbers before our CEO presentation tomorrow, so I will really appreciate your help.
      I need to calculate the number of sessions from our Windows users in 2023. I've tried to find the table with such data in our data warehouse, but wasn't able to. 
      Do you have any ideas whether we store the needed data somewhere, so that I can query it. 
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mI need to determine if we have the data on the number of sessions from Windows users in 2023 stored 

In [47]:
print(doc_result)

```markdown
Hey Max,

I hope you're doing well too. I have the information you need for your CEO presentation.

We do store data on the number of sessions from Windows users in 2023 in our data warehouse. This data is located in the 'user_sessions' table, which has the following structure:
- session_id: INTEGER
- user_id: INTEGER
- session_start: TIMESTAMP
- session_end: TIMESTAMP
- device_type: VARCHAR
- os: VARCHAR
- browser: VARCHAR
- country: VARCHAR

You can query this data using the following SQL statement:

```sql
SELECT COUNT(*) AS windows_sessions_2023 
FROM user_sessions 
WHERE os = 'Windows' 
AND session_start BETWEEN '2023-01-01 00:00:00' AND '2023-12-31 23:59:59';
```

This query will filter the sessions to include only those from Windows users in the year 2023 and give you the total number of sessions.

If you have any further questions or need additional assistance, please let me know.

Best regards,  
[Your Name]
```

The response is consistent, free of assumptions, and