In [2]:
from llama_index.core.query_pipeline import (
    QueryPipeline as QP,
    Link,
    InputComponent,
)
from llama_index.llms.ollama import Ollama

from llama_index.experimental.query_engine.pandas import (
    PandasInstructionParser,
)
from llama_index.core import PromptTemplate


In [3]:
import pandas as pd
df = pd.read_csv("../../data/titanic_train.csv")

In [13]:
df.head()

Unnamed: 0,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [19]:
df.corr('spearman')['survived']['age']


ValueError: could not convert string to float: 'Braund, Mr. Owen Harris'

In [17]:
df[["survived", "age"]].corr('spearman')['survived']['age']

-0.052565300044694487

In [18]:
df[["survived", "age"]].corr()

Unnamed: 0,survived,age
survived,1.0,-0.077221
age,-0.077221,1.0


In [16]:
df['survived']


0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: survived, Length: 891, dtype: int64

In [4]:
# setup Arize Phoenix for logging/observability
import phoenix as px

px.launch_app()
import llama_index.core

llama_index.core.set_global_handler("arize_phoenix")

SyntaxError: multiple exception types must be parenthesized (__init__.py, line 56)

In [None]:

instruction_str = (
    "1. Convert the query to executable Python code using Pandas.\n"
    "2. The final line of code should be a Python expression that can be called with the `eval()` function.\n"
    "3. The code should represent a solution to the query.\n"
    "4. PRINT ONLY THE EXPRESSION.\n"
    "5. Do not quote the expression.\n"
)

pandas_prompt_str = (
    "You are working with a pandas dataframe in Python.\n"
    "The name of the dataframe is `df`.\n"
    "This is the result of `print(df.head())`:\n"
    "{df_str}\n\n"
    "Follow these instructions:\n"
    "{instruction_str}\n"
    "Query: {query_str}\n\n"
    "Expression:"
)
response_synthesis_prompt_str = (
    "Given an input question, synthesize a response from the query results.\n"
    "Query: {query_str}\n\n"
    "Pandas Instructions (optional):\n{pandas_instructions}\n\n"
    "Pandas Output: {pandas_output}\n\n"
    "Response: "
)


In [None]:

pandas_prompt = PromptTemplate(pandas_prompt_str).partial_format(
    instruction_str=instruction_str, df_str=df.head(5)
)
pandas_output_parser = PandasInstructionParser(df)
response_synthesis_prompt = PromptTemplate(response_synthesis_prompt_str)
llm = Ollama(model="llama3", request_timeout=120.0)


In [None]:
qp = QP(
    modules={
        "input": InputComponent(),
        "pandas_prompt": pandas_prompt,
        "llm1": llm,
        "pandas_output_parser": pandas_output_parser,
        "response_synthesis_prompt": response_synthesis_prompt,
        "llm2": llm,
    },
    verbose=True,
)
qp.add_chain(["input", "pandas_prompt", "llm1", "pandas_output_parser"])
qp.add_links(
    [
        Link("input", "response_synthesis_prompt", dest_key="query_str"),
        Link(
            "llm1", "response_synthesis_prompt", dest_key="pandas_instructions"
        ),
        Link(
            "pandas_output_parser",
            "response_synthesis_prompt",
            dest_key="pandas_output",
        ),
    ]
)
# add link from response synthesis prompt to llm2
qp.add_link("response_synthesis_prompt", "llm2")

In [None]:
## create graph
from pyvis.network import Network

net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(qp.dag)
net.show("rag_dag.html")

## another option using `pygraphviz`
# from networkx.drawing.nx_agraph import to_agraph
# from IPython.display import Image
# agraph = to_agraph(p.dag)
# agraph.layout(prog="dot")
# agraph.draw('rag_dag.png')
# display(Image('rag_dag.png'))

In [8]:
response = qp.run(
    query_str="What is the correlation between survival and age?",
)

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is the correlation between survival and age?

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: What is the correlation between survival and age?

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
   survived  pclass                                               name  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant: df.corr('survived', 'age')

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: What is the correlation between survival and age?
pandas_instructions: assistant: df.corr('survived', 'age')
pandas_output: There was an error running the output as Python code. Error message: could not convert string to float: 'Braund, Mr. Owen Harri

Traceback (most recent call last):
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/llama_index/experimental/query_engine/pandas/output_parser.py", line 54, in default_output_processor
    output_str = str(safe_eval(module_end_str, global_vars, local_vars))
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/llama_index/experimental/exec_utils.py", line 159, in safe_eval
    return eval(__source, _get_restricted_globals(__globals), __locals)
  File "<string>", line 1, in <module>
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/pandas/core/frame.py", line 11049, in corr
    mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/pandas/core/frame.py", 

In [9]:
output, intermediates = qp.run_with_intermediates( query_str="What is the correlation between survival and age?",)


[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is the correlation between survival and age?

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: What is the correlation between survival and age?

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
   survived  pclass                                               name  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant: df.corr('spearman')['survived']['age']

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: What is the correlation between survival and age?
pandas_instructions: assistant: df.corr('spearman')['survived']['age']
pandas_output: There was an error running the output as Python code. Error message: could not convert string to float:

Traceback (most recent call last):
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/llama_index/experimental/query_engine/pandas/output_parser.py", line 54, in default_output_processor
    output_str = str(safe_eval(module_end_str, global_vars, local_vars))
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/llama_index/experimental/exec_utils.py", line 159, in safe_eval
    return eval(__source, _get_restricted_globals(__globals), __locals)
  File "<string>", line 1, in <module>
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/pandas/core/frame.py", line 11049, in corr
    mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
  File "/Users/lzchen/Library/Caches/pypoetry/virtualenvs/llamaindex-sample-BTxt78Z_-py3.10/lib/python3.10/site-packages/pandas/core/frame.py", 