In [1]:
import importlib
# import ryoma_ai.agent.chat_agent
import ryoma_ai.agent.sql
# importlib.reload(ryoma_ai.agent.chat_agent)
importlib.reload(ryoma_ai.agent.sql)

<module 'ryoma_ai.agent.sql' from '/Users/haoxu/dev/genaiz4/ryoma/packages/ryoma_ai/ryoma_ai/agent/sql.py'>

In [2]:
from ryoma_ai.agent.chat_agent import ChatAgent


## Ensure the OpenAI API key is set in the environment

In [21]:
# Create an simple ryoma Agent with GPT-3.5-turbo model
ryoma_agent = ChatAgent("gpt-3.5-turbo")
ryoma_agent.stream("I want to get the top 5 customers which making the most purchases")

Great! How can I assist you today with data science, analysis, or data engineering?

<generator object RunnableSequence.stream at 0x10ea87c40>

In [2]:
# Example of using a customized prompt template
ryoma_agent = ChatAgent("gpt-3.5-turbo").set_context_prompt(
    "Data Schema: snowflake_sample_data.tpch_sf1"
)
ryoma_agent.stream("I want to get the top 5 customers which making the most purchases")

To get the top 5 customers who have made the most purchases from the `snowflake_sample_data.tpch_sf1` dataset, you can run a SQL query similar to the following:

```sql
SELECT c.c_custkey, c.c_name, COUNT(o.o_orderkey) AS total_orders
FROM snowflake_sample_data.tpch_sf1.customer c
JOIN snowflake_sample_data.tpch_sf1.orders o ON c.c_custkey = o.o_custkey
GROUP BY c.c_custkey, c.c_name
ORDER BY total_orders DESC
LIMIT 5;
```

In this query:
- We are selecting the customer key (`c_custkey`) and customer name (`c_name`) from the `customer` table, and counting the number of orders for each customer by joining the `orders` table on the `c_custkey` and `o_custkey`.
- We are grouping the results by customer key and name.
- We are then ordering the results in descending order based on the total number of orders.
- Finally, we are limiting the output to the top 5 customers.

Please adjust the column names and table aliases based on the actual schema of your dataset if they are different from the

<generator object RunnableSequence.stream at 0x1687a5120>

## SqlAgent example

In [3]:
import os
from ryoma_ai.agent.chat_agent import ChatAgent
from ryoma_ai.datasource.snowflake import SnowflakeDataSource

# Set up the Snowflake and PostgreSQL data sources
SNOWFLAKE_USER = os.environ.get("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD = os.environ.get("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_WAREHOUSE = os.environ.get("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE = os.environ.get("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA = os.environ.get("SNOWFLAKE_SCHEMA")
SNOWFLAKE_ROLE = os.environ.get("SNOWFLAKE_ROLE")

In [4]:
sf_datasource = SnowflakeDataSource(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    db_schema=SNOWFLAKE_SCHEMA,
    role="ACCOUNTADMIN",
)

In [4]:
# Example of using the ryomaAgent with a Snowflake data source, This means no data catalog is provided to the agent.
ryoma_agent = (
    ChatAgent("gpt-3.5-turbo")
    .add_datasource(sf_datasource)
)

In [25]:
ryoma_agent.stream(
    "I want to get the top 5 customers which making the most purchases", display=True
)

Hello! How can I assist you today with your data science, analysis, or data engineering needs?

<generator object RunnableSequence.stream at 0x16852d990>

In [5]:
# Basic example of using the SQL agent, The data catalog is provided to the agent.
from ryoma_ai.agent.sql import SqlAgent
from ryoma_ai.prompt.base import BasicContextPromptTemplate
from ryoma_ai.agent.workflow import ToolMode

sql_agent = (
    SqlAgent("gpt-3.5-turbo")
    .add_datasource(sf_datasource)
)

In [6]:
sql_agent.stream(
    "I want to get the top 5 customers which making the most purchases", display=True
)


I want to get the top 5 customers which making the most purchases
Tool Calls:
  sql_database_query (call_XoXUiEatihTnMtQKKuIDqKXm)
 Call ID: call_XoXUiEatihTnMtQKKuIDqKXm
  Args:
    query: SELECT customer_id, SUM(total_price) as total_purchases FROM purchases GROUP BY customer_id ORDER BY total_purchases DESC LIMIT 5


<generator object Pregel.stream at 0x17796c950>

In [7]:
sql_agent.stream(tool_mode=ToolMode.ONCE)


I want to get the top 5 customers which making the most purchases
Tool Calls:
  sql_database_query (call_XoXUiEatihTnMtQKKuIDqKXm)
 Call ID: call_XoXUiEatihTnMtQKKuIDqKXm
  Args:
    query: SELECT customer_id, SUM(total_price) as total_purchases FROM purchases GROUP BY customer_id ORDER BY total_purchases DESC LIMIT 5
Name: sql_database_query

Received an error while executing the query: Failed to connect to Snowflake: 250001 (08001): Failed to connect to DB: mslzuie-afb39027.snowflakecomputing.com:443. Programmatic access token is expired.

It seems there was an issue with accessing the database due to an expired programmatic access token. To resolve this, I will retry the query after refreshing the access token.
Tool Calls:
  query_profile (call_ylYFYPm2ivmmMXkUHQ2ndwBR)
 Call ID: call_ylYFYPm2ivmmMXkUHQ2ndwBR
  Args:
    query: SELECT customer_id, SUM(total_price) as total_purchases FROM purchases GROUP BY customer_id ORDER BY total_purchases DESC LIMIT 5
  query_profile (call_PczR

<generator object Pregel.stream at 0x17796d090>

In [11]:
# Example of using the SQL agent to run a SQL query directly.
sample_sql_query = """
SELECT c_custkey, c_name, SUM(o_totalprice) AS total_purchase
FROM snowflake_sample_data.tpch_sf1.customer
JOIN snowflake_sample_data.tpch_sf1.orders
ON c_custkey = o_custkey
GROUP BY c_custkey, c_name
ORDER BY total_purchase
DESC LIMIT 10
"""

sql_agent.stream(sample_sql_query)



SELECT c_custkey, c_name, SUM(o_totalprice) AS total_purchase
FROM snowflake_sample_data.tpch_sf1.customer
JOIN snowflake_sample_data.tpch_sf1.orders
ON c_custkey = o_custkey
GROUP BY c_custkey, c_name
ORDER BY total_purchase
DESC LIMIT 10


It seems you have run the query again. The top 5 customers with the most purchases are:

1. Customer#000143500 - Total Purchase: $7,012,696.48
2. Customer#000095257 - Total Purchase: $6,563,511.23
3. Customer#000087115 - Total Purchase: $6,457,526.26
4. Customer#000131113 - Total Purchase: $6,311,428.86
5. Customer#000103834 - Total Purchase: $6,306,524.23

These customers have the highest total purchase amounts.


<generator object Pregel.stream at 0x1379dd820>