In [1]:
import pandas as pd

import os
from aita.agent.pandas import PandasAgent
from aita.datasource.snowflake import SnowflakeDataSource
from aita.prompt.base import BasicDataSourcePromptTemplate

In [2]:
SNOWFLAKE_USER = os.environ.get("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD = os.environ.get("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_WAREHOUSE = os.environ.get("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE = os.environ.get("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA = os.environ.get("SNOWFLAKE_SCHEMA")
SNOWFLAKE_ROLE = os.environ.get("SNOWFLAKE_ROLE")

In [3]:
sf_datasource = SnowflakeDataSource(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA,
    role=SNOWFLAKE_ROLE,
)

In [4]:
pandas_agent = (
    PandasAgent("gpt-3.5-turbo")
    .set_base_prompt_template(BasicDataSourcePromptTemplate)
    .add_datasource(sf_datasource)
)

In [5]:
pandas_agent.chat("get the first 2 rows of the data using pandas")


get the first 2 rows of the data using pandas
Tool Calls:
  convert_to_pandas (call_UXwZeNfcizgHWYW7hgWr44ud)
 Call ID: call_UXwZeNfcizgHWYW7hgWr44ud
  Args:
    query: SELECT * FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.ORDERS LIMIT 2;


<generator object Pregel.stream at 0x12fb1a340>

In [6]:
df = pandas_agent.call_tool("convert_to_pandas", tool_id="call_UXwZeNfcizgHWYW7hgWr44ud")

In [7]:
df.head()

Unnamed: 0,O_ORDERKEY,O_CUSTKEY,O_ORDERSTATUS,O_TOTALPRICE,O_ORDERDATE,O_ORDERPRIORITY,O_CLERK,O_SHIPPRIORITY,O_COMMENT
0,3000001,145618,F,30175.88,1992-12-17,4-NOT SPECIFIED,Clerk#000000141,0,l packages. furiously careful instructions gro...
1,3000002,1481,O,297999.63,1995-07-28,1-URGENT,Clerk#000000547,0,carefully unusual dependencie


In [8]:
pandas_agent.chat(allow_run_tool=True)

Name: convert_to_pandas

  O_ORDERKEY O_CUSTKEY O_ORDERSTATUS O_TOTALPRICE O_ORDERDATE  \
0    3000001    145618             F     30175.88  1992-12-17   
1    3000002      1481             O    297999.63  1995-07-28   

   O_ORDERPRIORITY          O_CLERK O_SHIPPRIORITY  \
0  4-NOT SPECIFIED  Clerk#000000141              0   
1         1-URGENT  Clerk#000000547              0   

                                           O_COMMENT  
0  l packages. furiously careful instructions gro...  
1                      carefully unusual dependencie  

The first 2 rows of the data from the "ORDERS" table are as follows:

| O_ORDERKEY | O_CUSTKEY | O_ORDERSTATUS | O_TOTALPRICE | O_ORDERDATE | O_ORDERPRIORITY | O_CLERK | O_SHIPPRIORITY | O_COMMENT |
|------------|-----------|---------------|--------------|-------------|-----------------|---------|-----------------|-----------|
| 3000001 | 145618 | F | 30175.88 | 1992-12-17 | 4-NOT SPECIFIED | Clerk#000000141 | 0 | l packages. furiously careful in

<generator object Pregel.stream at 0x12ff10a70>

In [3]:
df = pd.DataFrame(
    {
        "c_custkey": [1, 2, 3, 4, 5],
        "c_name": ["Alice", "Bob", "Charlie", "David", "Eve"],
        "total_purchase": [100, 200, 300, 400, 500],
    }
)

In [4]:
pandas_agent = (
    PandasAgent("gpt-3.5-turbo").set_prompt_context(BasicDataSourcePromptTemplate).add_dataframe(df)
)

In [5]:
pandas_agent.stream("get the first 2 rows of the data using pandas")


get the first 2 rows of the data using pandas
Tool Calls:
  pandas_tool (call_BcA17nIA0D6YZotG5sh44h2s)
 Call ID: call_BcA17nIA0D6YZotG5sh44h2s
  Args:
    script: df_6228688864.head(2)


<generator object Pregel.stream at 0x173d38310>

In [6]:
pandas_agent.stream(tool_mode="once")

Unnamed: 0,c_custkey,c_name,total_purchase
0,1,Alice,100
1,2,Bob,200



get the first 2 rows of the data using pandas
Tool Calls:
  pandas_tool (call_BcA17nIA0D6YZotG5sh44h2s)
 Call ID: call_BcA17nIA0D6YZotG5sh44h2s
  Args:
    script: df_6228688864.head(2)
Name: pandas_tool

<ExecutionResult object at 174327d10, execution_count=None error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 174327da0, raw_cell="df_6228688864.head(2)" store_history=False silent=False shell_futures=True cell_id=None> result=   c_custkey c_name  total_purchase
0          1  Alice             100
1          2    Bob             200>

The first 2 rows of the data are as follows:

| c_custkey | c_name | total_purchase |
|-----------|--------|----------------|
| 1         | Alice  | 100            |
| 2         | Bob    | 200            |


<generator object Pregel.stream at 0x10be256d0>

In [8]:
pandas_agent.stream("get the first 2 rows of the data using pandas", tool_mode="continuous")


get the first 2 rows of the data using pandas
Tool Calls:
  pandas_tool (call_BcA17nIA0D6YZotG5sh44h2s)
 Call ID: call_BcA17nIA0D6YZotG5sh44h2s
  Args:
    script: df_6228688864.head(2)
Name: pandas_tool

<ExecutionResult object at 174327d10, execution_count=None error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 174327da0, raw_cell="df_6228688864.head(2)" store_history=False silent=False shell_futures=True cell_id=None> result=   c_custkey c_name  total_purchase
0          1  Alice             100
1          2    Bob             200>

The first 2 rows of the data are as follows:

| c_custkey | c_name | total_purchase |
|-----------|--------|----------------|
| 1         | Alice  | 100            |
| 2         | Bob    | 200            |

get the first 2 rows of the data using pandas
Tool Calls:
  pandas_tool (call_7EdbGXbzdUkTW6duRElQtos1)
 Call ID: call_7EdbGXbzdUkTW6duRElQtos1
  Args:
    script: df_6228688864.head(2)
Iteration 1


Unnamed: 0,c_custkey,c_name,total_purchase
0,1,Alice,100
1,2,Bob,200


Name: pandas_tool

<ExecutionResult object at 172792330, execution_count=None error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 172792420, raw_cell="df_6228688864.head(2)" store_history=False silent=False shell_futures=True cell_id=None> result=   c_custkey c_name  total_purchase
0          1  Alice             100
1          2    Bob             200>

The first 2 rows of the data are as follows:

| c_custkey | c_name | total_purchase |
|-----------|--------|----------------|
| 1         | Alice  | 100            |
| 2         | Bob    | 200            |


<generator object Pregel.stream at 0x10bea3d40>

In [9]:
from langchain_core.output_parsers.openai_tools import PydanticToolsParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [10]:
class AnswerQuestion(BaseModel):
    answer: str = Field(description="The answer to the question")