In [5]:
import pandas as pd

import os
from aita.agent.pandas import PandasAgent
from aita.datasource.snowflake import SnowflakeDataSource
from aita.prompt.base import BasicDataSourcePromptTemplate

In [2]:
SNOWFLAKE_USER = os.environ.get("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD = os.environ.get("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_WAREHOUSE = os.environ.get("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE = os.environ.get("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA = os.environ.get("SNOWFLAKE_SCHEMA")
SNOWFLAKE_ROLE = os.environ.get("SNOWFLAKE_ROLE")

In [3]:
sf_datasource = SnowflakeDataSource(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
    role=SNOWFLAKE_ROLE,
)

In [4]:
pandas_agent = (
    PandasAgent("gpt-3.5-turbo")
    .set_prompt_template(BasicDataSourcePromptTemplate)
    .add_datasource(sf_datasource)
)

In [5]:
pandas_agent.stream("get the first 2 rows of the data using pandas")


get the first 2 rows of the data using pandas
Tool Calls:
  convert_to_pandas (call_UXwZeNfcizgHWYW7hgWr44ud)
 Call ID: call_UXwZeNfcizgHWYW7hgWr44ud
  Args:
    query: SELECT * FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS LIMIT 2;


<generator object Pregel.stream at 0x12fb1a340>

In [6]:
df = pandas_agent.call_tool("convert_to_pandas", tool_id="call_UXwZeNfcizgHWYW7hgWr44ud")

In [7]:
df.head()

Unnamed: 0,O_ORDERKEY,O_CUSTKEY,O_ORDERSTATUS,O_TOTALPRICE,O_ORDERDATE,O_ORDERPRIORITY,O_CLERK,O_SHIPPRIORITY,O_COMMENT
0,3000001,145618,F,30175.88,1992-12-17,4-NOT SPECIFIED,Clerk#000000141,0,l packages. furiously careful instructions gro...
1,3000002,1481,O,297999.63,1995-07-28,1-URGENT,Clerk#000000547,0,carefully unusual dependencie


In [8]:
pandas_agent.stream(allow_run_tool=True)

Name: convert_to_pandas

  O_ORDERKEY O_CUSTKEY O_ORDERSTATUS O_TOTALPRICE O_ORDERDATE  \
0    3000001    145618             F     30175.88  1992-12-17   
1    3000002      1481             O    297999.63  1995-07-28   

   O_ORDERPRIORITY          O_CLERK O_SHIPPRIORITY  \
0  4-NOT SPECIFIED  Clerk#000000141              0   
1         1-URGENT  Clerk#000000547              0   

                                           O_COMMENT  
0  l packages. furiously careful instructions gro...  
1                      carefully unusual dependencie  

The first 2 rows of the data from the "ORDERS" table are as follows:

| O_ORDERKEY | O_CUSTKEY | O_ORDERSTATUS | O_TOTALPRICE | O_ORDERDATE | O_ORDERPRIORITY | O_CLERK | O_SHIPPRIORITY | O_COMMENT |
|------------|-----------|---------------|--------------|-------------|-----------------|---------|-----------------|-----------|
| 3000001 | 145618 | F | 30175.88 | 1992-12-17 | 4-NOT SPECIFIED | Clerk#000000141 | 0 | l packages. furiously careful in

<generator object Pregel.stream at 0x12ff10a70>

In [6]:
df = pd.DataFrame(
    {
        "c_custkey": [1, 2, 3, 4, 5],
        "c_name": ["Alice", "Bob", "Charlie", "David", "Eve"],
        "total_purchase": [100, 200, 300, 400, 500],
    }
)

In [7]:
pandas_agent = (
    PandasAgent("gpt-3.5-turbo")
    .set_prompt_template(BasicDataSourcePromptTemplate)
    .add_dataframe(df)
)

In [8]:
pandas_agent.stream("get the first 2 rows of the data using pandas")


get the first 2 rows of the data using pandas


KeyboardInterrupt: 

In [5]:
pandas_agent.stream(allow_run_tool=True)

Unnamed: 0,c_custkey,c_name,total_purchase
0,1,Alice,100
1,2,Bob,200


Name: pandas_analysis_tool

<ExecutionResult object at 146d7aff0, execution_count=None error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 146d7b050, raw_cell="df_5423825936.head(2)" store_history=False silent=False shell_futures=True cell_id=None> result=   c_custkey c_name  total_purchase
0          1  Alice             100
1          2    Bob             200>

The first 2 rows of the data are as follows:

|   | c_custkey | c_name | total_purchase |
|---|-----------|--------|----------------|
| 0 | 1         | Alice  | 100            |
| 1 | 2         | Bob    | 200            |


<generator object Pregel.stream at 0x133077110>

In [4]:
pandas_agent.iteratively_stream("get the first 2 rows of the data using pandas")

Starting the iterative invocation process.

get the first 2 rows of the data using pandas
Tool Calls:
  pandas_analysis_tool (call_QAfPCSX59jurI1wOCPwKeWZ2)
 Call ID: call_QAfPCSX59jurI1wOCPwKeWZ2
  Args:
    script: df_5920852640.head(2)
Iteration 1


Unnamed: 0,c_custkey,c_name,total_purchase
0,1,Alice,100
1,2,Bob,200


Name: pandas_analysis_tool

<ExecutionResult object at 161c852b0, execution_count=None error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 161c85340, raw_cell="df_5920852640.head(2)" store_history=False silent=False shell_futures=True cell_id=None> result=   c_custkey c_name  total_purchase
0          1  Alice             100
1          2    Bob             200>

The first 2 rows of the data are:

| c_custkey | c_name | total_purchase |
|-----------|--------|----------------|
| 1         | Alice  | 100            |
| 2         | Bob    | 200            |


<generator object Pregel.stream at 0x1379b4af0>