In [2]:
!pip uninstall langchain -Y
!pip install -q langchain==0.3.22 langchain-aws==0.2.18 langchain-community==0.3.20 langchain-core==0.3.49 langchain-experimental==0.3.4 langgraph


Usage:   
  pip uninstall [options] <package> ...
  pip uninstall [options] -r <requirements file> ...

no such option: -Y


In [4]:
from langchain.chat_models import init_chat_model
import pydantic


# Initialize BedrockChat model
llm = init_chat_model(
    "amazon.nova-pro-v1:0",
    model_provider="bedrock",
    model_kwargs={'temperature': 0.7}
)


In [3]:
import pandas as pd
from langchain_core.prompts import ChatPromptTemplate
from langchain_experimental.tools import PythonAstREPLTool

df = pd.read_csv("titanic.csv")
tool = PythonAstREPLTool(locals={"df": df})
tool.invoke("df['Fare'].mean()")

32.204207968574636

In [5]:
llm_with_tools = llm.bind_tools([tool], tool_choice=tool.name)
response = llm_with_tools.invoke(
    "I have a dataframe 'df' and want to know the correlation between the 'Age' and 'Fare' columns"
)
response

AIMessage(content=[{'type': 'tool_use', 'name': 'python_repl_ast', 'input': {'query': "import pandas as pd\ndf = pd.DataFrame({'Age': [22, 35, 45, 29, 31], 'Fare': [7.25, 14.5, 31.0, 7.925, 26.55]})\ndf['Age'].corr(df['Fare'])"}, 'id': 'tooluse_Pdt6-wMOSMmjtVCaLseKPQ'}], additional_kwargs={}, response_metadata={'ResponseMetadata': {'RequestId': '91363bf8-0fc1-4acd-babe-4539c6215aea', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Wed, 21 May 2025 22:03:58 GMT', 'content-type': 'application/json', 'content-length': '416', 'connection': 'keep-alive', 'x-amzn-requestid': '91363bf8-0fc1-4acd-babe-4539c6215aea'}, 'RetryAttempts': 0}, 'stopReason': 'tool_use', 'metrics': {'latencyMs': [934]}, 'model_name': 'amazon.nova-pro-v1:0'}, id='run-a2d4c4b9-a6ba-45bd-a492-9936d63b4500-0', tool_calls=[{'name': 'python_repl_ast', 'args': {'query': "import pandas as pd\ndf = pd.DataFrame({'Age': [22, 35, 45, 29, 31], 'Fare': [7.25, 14.5, 31.0, 7.925, 26.55]})\ndf['Age'].corr(df['Fare'])"}, 'id': 'toolus

In [6]:
response.tool_calls

[{'name': 'python_repl_ast',
  'args': {'query': "import pandas as pd\ndf = pd.DataFrame({'Age': [22, 35, 45, 29, 31], 'Fare': [7.25, 14.5, 31.0, 7.925, 26.55]})\ndf['Age'].corr(df['Fare'])"},
  'id': 'tooluse_Pdt6-wMOSMmjtVCaLseKPQ',
  'type': 'tool_call'}]

In [12]:
response.tool_calls[0]["id"]

'tooluse_Pdt6-wMOSMmjtVCaLseKPQ'

In [7]:
from langchain_core.output_parsers.openai_tools import JsonOutputKeyToolsParser

parser = JsonOutputKeyToolsParser(key_name=tool.name, first_tool_only=True)
(llm_with_tools | parser).invoke(
    "I have a dataframe 'df' and want to know the correlation between the 'Age' and 'Fare' columns"
)

{'query': "import pandas as pd\ndf = pd.DataFrame({'Age': [22, 35, 45, 60, 28], 'Fare': [7.25, 71.28, 7.92, 53.15, 8.05]})\ndf.corr()"}

In [8]:
system = f"""You have access to a pandas dataframe `df`. \
Here is the output of `df.head().to_markdown()`:

\`\`\`
{df.head().to_markdown()}
\`\`\`

Given a user question, write the Python code to answer it. \
Return ONLY the valid Python code and nothing else. \
Don't assume you have access to any libraries other than built-in Python ones and pandas."""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])
code_chain = prompt | llm_with_tools | parser
code_chain.invoke({"question": "What's the correlation between age and fare"})

{'query': "df['Age'].corr(df['Fare'])"}

In [24]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [27]:
from operator import itemgetter

from langchain_core.messages import ToolMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough

system = f"""You have access to a pandas dataframe `df`, by running df = pd.read_csv("titanic.csv"). \
Here is the output of `df.head().to_markdown()`:

\`\`\`
{df.head().to_markdown()}
\`\`\`

Given a user question, write the Python code to answer it. \
Don't assume you have access to any libraries other than built-in Python ones and pandas.
Respond directly to the question once you have enough information to answer it."""
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system,
        ),
        ("human", "{question}"),
        # This MessagesPlaceholder allows us to optionally append an arbitrary number of messages
        # at the end of the prompt using the 'chat_history' arg.
        MessagesPlaceholder("chat_history", optional=True),
    ]
)


In [28]:
prompt

ChatPromptTemplate(input_variables=['question'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChun

In [33]:
def _get_chat_history(x: dict) -> list:
    """Parse the chain output up to this point into a list of chat history messages to insert in the prompt."""
    ai_msg = x["ai_msg"]
    tool_call_id = ai_msg.tool_calls[0]["id"]
    tool_msg = ToolMessage(tool_call_id=tool_call_id, content=str(x["tool_output"]))
    print([ai_msg, tool_msg])
    return [ai_msg, tool_msg]


chain = (
    RunnablePassthrough.assign(ai_msg=prompt | llm_with_tools)
    .assign(tool_output=itemgetter("ai_msg") | parser | tool)
    .assign(chat_history=_get_chat_history)
    #.assign(response=prompt | llm | StrOutputParser())
    .pick(["tool_output", "response"])
)

In [34]:
chain.invoke({"question": "What's the correlation between age and fare"})

[AIMessage(content=[{'type': 'tool_use', 'name': 'python_repl_ast', 'input': {'query': 'import pandas as pd\ndf = pd.read_csv("titanic.csv")\ndf[\'Age\'].corr(df[\'Fare\'])'}, 'id': 'tooluse_8g-afeaZQ520Z-QR79k6SQ'}], additional_kwargs={}, response_metadata={'ResponseMetadata': {'RequestId': '00a5c223-c30b-4754-8e15-747826e303d9', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Wed, 21 May 2025 22:44:59 GMT', 'content-type': 'application/json', 'content-length': '359', 'connection': 'keep-alive', 'x-amzn-requestid': '00a5c223-c30b-4754-8e15-747826e303d9'}, 'RetryAttempts': 0}, 'stopReason': 'tool_use', 'metrics': {'latencyMs': [502]}, 'model_name': 'amazon.nova-pro-v1:0'}, id='run-167a6529-993d-4e4b-9e81-f668fba17ad3-0', tool_calls=[{'name': 'python_repl_ast', 'args': {'query': 'import pandas as pd\ndf = pd.read_csv("titanic.csv")\ndf[\'Age\'].corr(df[\'Fare\'])'}, 'id': 'tooluse_8g-afeaZQ520Z-QR79k6SQ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 957, 'output_tokens': 84, '

{'tool_output': 0.09606669176903888}

In [18]:
from langchain_experimental.agents import create_pandas_dataframe_agent

agent = create_pandas_dataframe_agent(
    llm, df, agent_type="openai-tools", verbose=True, allow_dangerous_code=True
)
agent.invoke(
    {
        "input": "What's the correlation between age and fare? is that greater than the correlation between fare and survival?"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `python_repl_ast` with `{'query': "age_fare_corr = df['Age'].corr(df['Fare'])\nfare_survival_corr = df['Fare'].corr(df['Survived'])\n(age_fare_corr, fare_survival_corr)"}`
responded: [{'type': 'text', 'text': '<thinking> To determine the correlation between age and fare, and compare it with the correlation between fare and survival, I need to calculate the correlation coefficients for both pairs of columns. The correlation coefficient is a measure of the linear relationship between two variables, ranging from -1 to 1. A value of 1 indicates a strong positive relationship, -1 indicates a strong negative relationship, and 0 indicates no relationship.\n\nFirst, I will calculate the correlation between age and fare using the `corr()` function. Then, I will calculate the correlation between fare and survival. Finally, I will compare the two correlation coefficients to determine if the correlation between age and fare is

{'input': "What's the correlation between age and fare? is that greater than the correlation between fare and survival?",
 'output': [{'type': 'text',
   'text': 'The correlation between age and fare is not greater than the correlation between fare and survival. The correlation between age and fare is approximately 0.096, while the correlation between fare and survival is approximately 0.257. Therefore, the correlation between fare and survival is stronger than the correlation between age and fare.',
   'index': 0}]}

In [19]:
df_1 = df[["Age", "Fare"]]
df_2 = df[["Fare", "Survived"]]

tool = PythonAstREPLTool(locals={"df_1": df_1, "df_2": df_2})
llm_with_tool = llm.bind_tools(tools=[tool], tool_choice=tool.name)
df_template = """\`\`\`python
{df_name}.head().to_markdown()
>>> {df_head}
\`\`\`"""
df_context = "\n\n".join(
    df_template.format(df_head=_df.head().to_markdown(), df_name=df_name)
    for _df, df_name in [(df_1, "df_1"), (df_2, "df_2")]
)

system = f"""You have access to a number of pandas dataframes. \
Here is a sample of rows from each dataframe and the python code that was used to generate the sample:

{df_context}

Given a user question about the dataframes, write the Python code to answer it. \
Don't assume you have access to any libraries other than built-in Python ones and pandas. \
Make sure to refer only to the variables mentioned above."""
prompt = ChatPromptTemplate.from_messages([("system", system), ("human", "{question}")])

chain = prompt | llm_with_tool | parser | tool
chain.invoke(
    {
        "question": "return the difference in the correlation between age and fare and the correlation between fare and survival"
    }
)

0.07603993061334446