In [1]:
import pandas as pd

import os
from aita.agent.pandas import PandasAgent
from aita.datasource.snowflake import SnowflakeDataSource
from aita.prompt.builder import BasicPromptTemplate

In [2]:
SNOWFLAKE_USER = os.environ.get("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD = os.environ.get("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_WAREHOUSE = os.environ.get("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE = os.environ.get("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA = os.environ.get("SNOWFLAKE_SCHEMA")
SNOWFLAKE_ROLE = os.environ.get("SNOWFLAKE_ROLE")

In [3]:
sf_datasource = SnowflakeDataSource(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
    role=SNOWFLAKE_ROLE,
)

In [4]:
pandas_agent = PandasAgent("gpt-3.5-turbo") \
    .set_prompt_template(BasicPromptTemplate) \
    .add_datasource(sf_datasource)

In [5]:
pandas_agent.chat("get the first 2 rows of the data using pandas")


get the first 2 rows of the data using pandas
Tool Calls:
  convert_to_pandas (call_ElU6rWVvaUd5vBUKMhw03Ava)
 Call ID: call_ElU6rWVvaUd5vBUKMhw03Ava
  Args:
    query: SELECT * FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS LIMIT 2


<generator object Pregel.stream at 0x17466bf00>

In [6]:
pandas_agent.chat(allow_run_tool=True)

Name: convert_to_pandas

  O_ORDERKEY O_CUSTKEY O_ORDERSTATUS O_TOTALPRICE O_ORDERDATE  \
0    3000001    145618             F     30175.88  1992-12-17   
1    3000002      1481             O    297999.63  1995-07-28   

   O_ORDERPRIORITY          O_CLERK O_SHIPPRIORITY  \
0  4-NOT SPECIFIED  Clerk#000000141              0   
1         1-URGENT  Clerk#000000547              0   

                                           O_COMMENT  
0  l packages. furiously careful instructions gro...  
1                      carefully unusual dependencie  

Here are the first 2 rows of the data from the "ORDERS" table:

| O_ORDERKEY | O_CUSTKEY | O_ORDERSTATUS | O_TOTALPRICE | O_ORDERDATE | O_ORDERPRIORITY | O_CLERK        | O_SHIPPRIORITY | O_COMMENT                                          |
|------------|-----------|---------------|--------------|-------------|-----------------|----------------|-----------------|---------------------------------------------------|
| 3000001    | 145618    | F    

<generator object Pregel.stream at 0x174677f20>

In [7]:
df = pd.DataFrame(
    {
        "c_custkey": [1, 2, 3, 4, 5],
        "c_name": ["Alice", "Bob", "Charlie", "David", "Eve"],
        "total_purchase": [100, 200, 300, 400, 500],
    }
)

In [8]:
pandas_agent = PandasAgent("gpt-3.5-turbo") \
    .set_prompt_template(BasicPromptTemplate) \
    .add_dataframe(df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   c_custkey       5 non-null      int64 
 1   c_name          5 non-null      object
 2   total_purchase  5 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 252.0+ bytes


In [9]:
pandas_agent.chat("get the first 2 rows of the data using pandas")


get the first 2 rows of the data using pandas
Tool Calls:
  pandas_analysis_tool (call_ehVDvzYuMoYYbUozEJZXUylH)
 Call ID: call_ehVDvzYuMoYYbUozEJZXUylH
  Args:
    script: df.head(2)


<generator object Pregel.stream at 0x16a6366c0>

In [10]:
pandas_agent.chat(allow_run_tool=True)

Unnamed: 0,c_custkey,c_name,total_purchase
0,1,Alice,100
1,2,Bob,200


Name: pandas_analysis_tool

<ExecutionResult object at 2d694d0d0, execution_count=None error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 2d694cd70, raw_cell="df.head(2)" store_history=False silent=False shell_futures=True cell_id=None> result=   c_custkey c_name  total_purchase
0          1  Alice             100
1          2    Bob             200>

Here are the first 2 rows of the data:

|   | c_custkey | c_name | total_purchase |
|---|-----------|--------|----------------|
| 0 | 1         | Alice  | 100            |
| 1 | 2         | Bob    | 200            |


<generator object Pregel.stream at 0x1734f0f60>