In [1]:
import os
from aita.datasource.snowflake import SnowflakeDataSource
from aita.datasource.postgresql import PostgreSqlDataSource
from aita.agent.base import AitaAgent
from aita.agent.sql import SqlAgent
from aita.agent.pandas import PandasAgent
from aita.agent.python import PythonAgent

In [2]:
aita_agent = AitaAgent("gpt-3.5-turbo")
aita_agent.chat("I want to get the top 5 customers which making the most purchases")

To get the top 5 customers who are making the most purchases, you can follow these steps:

1. Analyze your sales data to determine the total number of purchases made by each customer.
2. Rank the customers based on the total number of purchases they have made.
3. Select the top 5 customers with the highest number of purchases.

You can use a spreadsheet program like Microsoft Excel or Google Sheets to organize and analyze your sales data. Create a table with columns for customer names and the number of purchases they have made. Sort the data by the number of purchases in descending order to identify the top customers.

Alternatively, if you have a customer relationship management (CRM) system or sales software, you can generate a report that shows the top customers based on the number of purchases they have made. Look for features like customer analytics or sales reports that can provide this information.

By identifying the top 5 customers who are making the most purchases, you can fo

In [3]:
SNOWFLAKE_USER = os.environ.get("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD = os.environ.get("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_WAREHOUSE = os.environ.get("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE = os.environ.get("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA = os.environ.get("SNOWFLAKE_SCHEMA")
SNOWFLAKE_ROLE = os.environ.get("SNOWFLAKE_ROLE")

In [4]:
sf_datasource = SnowflakeDataSource(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
    role=SNOWFLAKE_ROLE,
)

In [5]:
pg_datasource = PostgreSqlDataSource(
    connection_url="postgresql://@localhost:5432/aita"
)

In [6]:
# Basic example of using the SQL agent
sql_agent = SqlAgent(sf_datasource, "gpt-3.5-turbo", allow_extract_metadata=True)

for e in sql_agent.chat("I want to get the top 5 customers which making the most purchases"):
    print(e)


I want to get the top 5 customers which making the most purchases
Tool Calls:
  sql_database_query (call_hC4yPIMskrpcYXgQnEZbYez3)
 Call ID: call_hC4yPIMskrpcYXgQnEZbYez3
  Args:
    query: SELECT C_NAME, SUM(O_TOTALPRICE) AS TOTAL_PURCHASES FROM CUSTOMER JOIN ORDERS ON C_CUSTKEY = O_CUSTKEY GROUP BY C_NAME ORDER BY TOTAL_PURCHASES DESC LIMIT 5;


<generator object Pregel.stream at 0x16721d490>

In [7]:
print(sql_agent.chat(allow_run_tool=True))

Name: sql_database_query

[('Customer#000143500', Decimal('7012696.48')), ('Customer#000095257', Decimal('6563511.23')), ('Customer#000087115', Decimal('6457526.26')), ('Customer#000131113', Decimal('6311428.86')), ('Customer#000103834', Decimal('6306524.23'))]

The top 5 customers who made the most purchases are:
1. Customer#000143500 - Total Purchases: $7,012,696.48
2. Customer#000095257 - Total Purchases: $6,563,511.23
3. Customer#000087115 - Total Purchases: $6,457,526.26
4. Customer#000131113 - Total Purchases: $6,311,428.86
5. Customer#000103834 - Total Purchases: $6,306,524.23


<generator object Pregel.stream at 0x16672f3b0>

In [7]:
# Example of using the SQL agent to run a SQL query directly.
sample_sql_query = """
SELECT c_custkey, c_name, SUM(o_totalprice) AS total_purchase
FROM snowflake_sample_data.tpch_sf1.customer
JOIN snowflake_sample_data.tpch_sf1.orders
ON c_custkey = o_custkey
GROUP BY c_custkey, c_name
ORDER BY total_purchase
DESC LIMIT 10
"""

print(sql_agent.chat(sample_sql_query))



SELECT c_custkey, c_name, SUM(o_totalprice) AS total_purchase
FROM snowflake_sample_data.tpch_sf1.customer
JOIN snowflake_sample_data.tpch_sf1.orders
ON c_custkey = o_custkey
GROUP BY c_custkey, c_name
ORDER BY total_purchase
DESC LIMIT 10


The SQL query you provided retrieves the top 10 customers based on their total purchases. Here are the results:

| c_custkey | c_name           | total_purchase |
|-----------|------------------|----------------|
| 143500    | Customer#000143500 | 7012696.48    |
| 95257     | Customer#000095257 | 6563511.23    |
| 87115     | Customer#000087115 | 6457526.26    |
| 131113    | Customer#000131113 | 6311428.86    |
| 103834    | Customer#000103834 | 6306524.23    |
| 135866    | Customer#000135866 | 6203588.38    |
| 4701      | Customer#000004701 | 6198974.53    |
| 121827    | Customer#000121827 | 6145873.12    |
| 96919     | Customer#000096919 | 6090547.83    |
| 15531     | Customer#000015531 | 6053292.63    | 

These are the top 10 customers 

In [4]:
# Example of using the Pandas agent
pandas_agent = PandasAgent(sf_datasource, "gpt-3.5-turbo")
pandas_agent.chat("I want to get the top customers which making the most purchases")


I want to get the top customers which making the most purchases
{'messages': [HumanMessage(content='I want to get the top customers which making the most purchases', id='f33ae5ad-6576-4544-b362-07abf55acee6')], 'user_info': None}
Tool Calls:
  pandas_analysis_tool (call_T5KmzRKvw2mLDYnUE8VZILU5)
 Call ID: call_T5KmzRKvw2mLDYnUE8VZILU5
  Args:
    script: 
import pandas as pd

# Assume 'datasource' is the data source containing the tables

# Query to get the top customers making the most purchases
query = 'SELECT C_NAME, COUNT(*) AS PURCHASE_COUNT FROM CUSTOMER JOIN ORDERS ON C_CUSTKEY = O_CUSTKEY GROUP BY C_NAME ORDER BY PURCHASE_COUNT DESC'

dataframe = datasource.to_pandas(query)
dataframe.head()


<generator object Pregel.stream at 0x14a050610>

In [5]:
pandas_agent.chat(allow_run_tool=True)

Unnamed: 0,C_NAME,PURCHASE_COUNT
0,Customer#000003451,41
1,Customer#000102004,41
2,Customer#000102022,41
3,Customer#000117082,40
4,Customer#000079300,40


Name: pandas_analysis_tool

<ExecutionResult object at 14f9e61b0, execution_count=None error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 14f9e6210, raw_cell="
import pandas as pd

# Assume 'datasource' is the.." store_history=False silent=False shell_futures=True cell_id=None> result=               C_NAME PURCHASE_COUNT
0  Customer#000003451             41
1  Customer#000102004             41
2  Customer#000102022             41
3  Customer#000117082             40
4  Customer#000079300             40>
{'messages': [HumanMessage(content='I want to get the top customers which making the most purchases', id='f33ae5ad-6576-4544-b362-07abf55acee6'), AIMessage(content='', additional_kwargs={'tool_calls': [{'function': {'arguments': '{"script":"\\nimport pandas as pd\\n\\n# Assume \'datasource\' is the data source containing the tables\\n\\n# Query to get the top customers making the most purchases\\nquery = \'SELECT C_NAME, COUNT(*) AS PURCHASE_COUNT FROM CUSTOMER JOIN

<generator object Pregel.stream at 0x14a0781d0>

In [None]:
tool_spec = {
    "name": "pandas_analysis_tool",
    "arguments": {
        "script": """
 import pandas as pd\n\n# Define the data sources\norders_data = datasource.to_pandas('SELECT * FROM ORDERS')\ncustomer_data = datasource.to_pandas('SELECT * FROM CUSTOMER')\n\n# Join the ORDERS and CUSTOMER tables\nmerged_data = pd.merge(orders_data, customer_data, left_on='O_CUSTKEY', right_on='C_CUSTKEY')\n\n# Group by customer and calculate total amount spent\ncustomer_total_spent = merged_data.groupby('C_NAME')['O_TOTALPRICE'].sum().reset_index()\n\n# Sort customers based on total amount spent\ntop_customers = customer_total_spent.sort_values(by='O_TOTALPRICE', ascending=False)\n\n# Display the top customers\ntop_customers.head()
 """}
}
pandas_agent.run_tool(tool_spec)

In [None]:
# Example of using the Python agent
python_agent = PythonAgent(sf_datasource, "gpt-3.5-turbo")
python_agent.chat(
    "python code to show the customers data with snowflake database as data source",
    allow_run_tool=True,
)