In [1]:
#!python3
import os
import requests
import warnings
import io
import pandas as pd
from crewai import Agent, Task, Crew

warnings.filterwarnings('ignore')

### Setting up Llama 3

In [3]:
os.environ["OPENAI_API_BASE"]='http://localhost:11434/v1'
os.environ["OPENAI_MODEL_NAME"]='crewai-llama3'  # Adjust based on available model
os.environ["OPENAI_API_KEY"] = "NA"

### Answering questions

In [50]:
from crewai_tools import DirectoryReadTool, FileReadTool

documentation_directory_tool = DirectoryReadTool(
    directory = '/Users/marie/Documents/github/miptgirl_medium/crewai_answering_questions/ecommerce_documentation')

base_file_read_tool = FileReadTool()

In [51]:
from crewai_tools import BaseTool

class FileReadToolUPD(BaseTool):
    name: str = "Read a file's content"
    description: str = "A tool that can be used to read a file's content."

    def _run(self, file_path: str) -> str:
        # Implementation goes here
        return base_file_read_tool._run(file_path = file_path.strip('"').strip("'"))
        
file_read_tool = FileReadToolUPD()

In [76]:
data_support_agent = Agent(
    role = "Senior Data Support Agent",
	goal = "Be the most helpful support for you colleagues",
	backstory = '''
    You work as a support for data-related questions in the company. 
    Even though you're a big expert in our data warehouse, you double check all the facts in documentation. 
    Our documentation is absolutely up-to-date, so you can fully rely on it when answering questions (you don't need to check the actual data in database).
    Your work is very important for the team success. However, remember that examples of table rows don't show all the possible values. 
    You need to ensure that you provide the best possible support: answering all the questions, making no assumptions 
    and sharing only the factual data.
    Be creative try your best to solve the customer problem. 
    ''',
	allow_delegation=False,
	verbose=True
)

In [77]:
qa_support_agent = Agent(
    role = "Support Quality Assurance Agent",
    goal = "Ensure the highest quality of the answers we provide to the customers",
    backstory = '''
    You work as a Quality Assurance specialist, checking the work from  support agents and ensuring that it's inline with our highest standards.
    You need to check that the agent provides the full complete answers and make no assumptions. 
    Also, you need to make sure that the documentation addresses all the questions and is easy to understand.
    ''',
    allow_delegation=False,
    verbose=True
)

In [80]:
draft_data_answer = Task(
    description = '''Very important customer {customer} reached out to you with the following question:
    ```
    {question}
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.''',
    expected_output = '''The detailed informative answer to the customer's question that addresses all the point mentioned. 
    Make sure that answer is complete and stict to facts (without any additional information not based on the factual data)''',
    tools = [documentation_directory_tool, file_read_tool], 
    agent = data_support_agent
)

In [75]:
answer_review = Task(
    description = '''
    Review the draft answer provided by the support agent.
    Ensure that the it fully answers all the questions mentioned in the initial inquiry. 
    Make sure that the answer is consistent and doesn't include any assumptions.
    ''',
    expected_output = '''
    The final version of the answer in markdown format that can be shared with the customer. 
    The answer should fully address all the questions, be consistent and follow our professional but informal tone of voice. 
    We are very chill and friendly company, so don't forget to include all the polite phrases.
    ''',
    tools = [], 
    agent = qa_support_agent
)

In [66]:
qna_crew = Crew(
    agents=[data_support_agent, qa_support_agent],
    tasks=[draft_data_answer,  answer_review],
    verbose=3,
    memory = False # don't work with Llama
)



In [69]:
result = qna_crew.kickoff(
    {'customer': "Max", 
     'question': """Hey team, I hope you're doing well. I need to find the numbers before our CEO presentation tomorrow, so I will really appreciate your help.
      I need to calculate the number of sessions from our Windows users in 2023. I've tried to find the table with such data in our data warehouse, but wasn't able to. 
      Do you have any ideas whether we store the needed data somewhere, so that I can query it. """
    }
)

[1m[95m [DEBUG]: == Working Agent: Senior Documentation Support Agent[00m
[1m[95m [INFO]: == Starting Task: Very important Max reached out to you with the following question:
    ```
    Hey team, I hope you're doing well. I need to find the numbers before our CEO presentation tomorrow, so I will really appreciate your help.
      I need to calculate the number of sessions from our Windows users in 2023. I've tried to find the table with such data in our data warehouse, but wasn't able to. 
      Do you have any ideas whether we store the needed data somewhere, so that I can query it. 
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mI think I need to gather more information about the data warehouse to provide an 

In [70]:
print(result)

**Hello Max,**

Thank you for reaching out with your question! I'm happy to help you find the number of sessions from Windows users in 2023. After reviewing our documentation, I found that we do store data related to sessions and users in our ecommerce database, specifically in the `ecommerce_db.sessions` table.

To answer your question, I can provide you with a step-by-step guide on how to query this table using SQL. First, you can use the `session_id` column along with the `os` column filtering for "Windows" and the `action_date` column filtering for dates in 2023. Then, you can group the results by `os` using the `GROUP BY` clause to count the number of sessions that meet these conditions.

Here's a sample SQL query that should give you the desired output:
```sql
SELECT COUNT(*) 
FROM ecommerce_db.sessions 
WHERE os = 'Windows' 
AND action_date BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY os;
```
This query will return the total number of sessions from Windows users in 2023. I hop

In [88]:
ch_support_agent = Agent(
    role = "Senior ClickHouse Support Agent",
	goal = "Be the most helpful support for you colleagues",
	backstory = '''
    You work as a support for questions about ClickHouse database in the company. 
    Even though you're a big expert, you double check all the facts in documentation. 
    Our documentation is absolutely up-to-date, so you can fully rely on it when answering questions.
    Your work is very important for the team success.
    You need to ensure that you provide the best possible support: answering all the questions, making no assumptions 
    and sharing only the factual data.
    Be creative try your best to solve the customer problem. 
    ''',
	allow_delegation=False,
	verbose=True
)

In [104]:
# ! pip install sentence-transformers

In [129]:
from crewai_tools import ScrapeWebsiteTool, WebsiteSearchTool
# ch_documenation_tool = WebsiteSearchTool(
#     'https://medium.com/towardsdev/clickhouse-tips-tricks-i-wish-i-knew-f575f0371cd3',
#     config=dict(
#         embedder=dict(
#             provider="huggingface", 
#             config=dict(
#                 model="mixedbread-ai/mxbai-embed-large-v1", # https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
#             ),
#         ),
#     )
# )

ch_documenation_tool = ScrapeWebsiteTool('https://clickhouse.com/docs/en/guides/creating-tables')

In [130]:
# ch_documenation_tool._run(search_query = 'uniq aggregation')

In [131]:
draft_ch_answer = Task(
    description = '''Very important customer {customer} reached out to you with the following question:
    ```
    {question}
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.''',
    expected_output = '''The detailed informative answer to the customer's question that addresses all the point mentioned. 
    Make sure that answer is complete and stict to facts (without any additional information not based on the factual data)''',
    tools = [ch_documenation_tool], 
    agent = data_support_agent
)

In [132]:
complext_qna_crew = Crew(
    agents=[ch_support_agent, qa_support_agent],
    tasks=[draft_ch_answer,  answer_review],
    verbose=3,
    memory = False # don't work with Llama
)



In [133]:
result = complext_qna_crew.kickoff(
    {'customer': "Maria", 
     'question': """Good morning, team. I'm a newbie with ClickHouse. 
     Could you please explain how to create a table or share the valid documentation?"""
    }
)

[1m[95m [DEBUG]: == Working Agent: Senior Data Support Agent[00m
[1m[95m [INFO]: == Starting Task: Very important customer Maria reached out to you with the following question:
    ```
    Good morning, team. I'm a newbie with ClickHouse. 
     Could you please explain how to create a table?
    ```
    
    Your task is to provide the best answer to all the points in the question using all available information and not making any assumprions. 
    If you don't have enough information to answer the question, just say that you don't know.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mAction: Read website content
Action Input: {"path": "https://clickhouse.com/docs/en/guides/creating-tables[0m[95m 

Creating Tables in ClickHouse | ClickHouse Docs
Skip to main contentProductResourcesUse casesPricingContact Us35.0kSign inGet startedDocsCloudSQL ReferenceKnowledge Base
EnglishRussianChinesedark ModeSearchGet StartedWhat is ClickHouse?Quick StartAdvanced Tutoria

In [134]:
print(result)

**Creating a Table in ClickHouse: A Step-by-Step Guide**

Hi Maria! Thank you for reaching out to us about creating a new table in ClickHouse. I'd be happy to walk you through the process step by step.

Before we dive in, let's start with the basics. Like most databases, ClickHouse logically groups tables into databases using the `CREATE DATABASE` command. To create a new database, use the following SQL code:

```sql
CREATE DATABASE IF NOT EXISTS helloworld
```

Once you have created your database (or selected an existing one), you can move on to creating a table! Use the `CREATE TABLE` command to define your new table.

For example, let's say you want to create a table named `my_first_table` in the `helloworld` database. Here's how you would do it:

```sql
CREATE TABLE helloworld.my_first_table (
    user_id UInt32,
    message String,
    timestamp DateTime,
    metric Float32
) ENGINE = MergeTree()
PRIMARY KEY (user_id, timestamp)
```

In this example, we've created a table called `