In [41]:
import phoenix as px
px.close_app()
# px.launch_app()
# import llama_index.core
# llama_index.core.set_global_handler("arize_phoenix")
# print(px.active_session())

In [28]:
from llama_index.core.query_pipeline import (
    QueryPipeline as QP,
    Link,
    InputComponent
)
from llama_index.core.query_engine.pandas import PandasInstructionParser
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import PromptTemplate

In [30]:
# !wget 'https://raw.githubusercontent.com/jerryjliu/llama_index/main/docs/docs/examples/data/csv/titanic_train.csv' -O 'titanic_train.csv'

In [31]:
import pandas as pd
df = pd.read_csv(r"D:\Gen_AI_Tutorials\langchain\llamaindexrag\csv_data\titanic_train.csv")

In [33]:
instruction_str = (
    "1. Convert the query to executable Python code using Pandas.\n"
    # that can be called with the `eval()` function.
    "2. The final line of code should be a Python expression.\n"
    "3. The code should represent a solution to the query.\n"
    "4. PRINT ONLY THE EXPRESSION.\n"
    "5. Do not quote the expression.\n"
)

pandas_prompt_str = (
    "You are working with a pandas dataframe in Python.\n"
    "The name of the dataframe is `df`.\n"
    "This is the result of `print(df.head())`:\n"
    "{df_str}\n\n"
    "Follow these instructions:\n"
    "{instruction_str}\n"
    "Query: {query_str}\n\n"
    "Expression:"
)
response_synthesis_prompt_str = (
    "Given an input question, synthesize a response from the query results.\n"
    "Query: {query_str}\n\n"
    "Pandas Instructions (optional):\n{pandas_instructions}\n\n"
    "Pandas Output: {pandas_output}\n\n"
    "Response: "
)

In [34]:
pandas_prompt = PromptTemplate(pandas_prompt_str).partial_format(
    instruction_str = instruction_str,
    df_str=df.head(5)
)

pandas_output_parser = PandasInstructionParser(df)
response_synthesis_prompt = PromptTemplate(response_synthesis_prompt_str)


In [35]:
# embedding Model
embed_model = OllamaEmbedding(model_name="nomic-embed-text")
# llm
llm= Ollama(model="phi",request_timeout=300)

In [36]:
qp = QP(
    modules={
        "input":InputComponent(),
        "pandas_prompt": pandas_prompt,
        "llm1": llm,
        "pandas_output_parser": pandas_output_parser,
        "response_synthesis_prompt": response_synthesis_prompt,
        "llm2": llm
    },
    verbose=True,
)

In [37]:
qp.add_chain(["input", "pandas_prompt", "llm1", "pandas_output_parser"])
qp.add_links([
    Link("input", "response_synthesis_prompt", dest_key="query_str"),
    Link("llm1", "response_synthesis_prompt", dest_key="pandas_instructions"),
    Link("pandas_output_parser", "response_synthesis_prompt", dest_key="pandas_output")
])

In [38]:
# add link from response synthesis prompt to llm2
qp.add_link("response_synthesis_prompt", "llm2")

In [22]:
## create graph
# from pyvis.network import Network

# net = Network(notebook=True, cdn_resources="in_line", directed=True)
# net.from_nx(qp.dag)
# net.show("panda_query_rag.html")

In [39]:

response = qp.run(
    query_str="What is the correlation between survival and age?",
)

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is the correlation between survival and age?

[0m[1;3;38;2;155;135;227m> Running module pandas_prompt with input: 
query_str: What is the correlation between survival and age?

[0m[1;3;38;2;155;135;227m> Running module llm1 with input: 
messages: You are working with a pandas dataframe in Python.
The name of the dataframe is `df`.
This is the result of `print(df.head())`:
   survived  pclass  ... cabin embarked
0         0       3  ...   NaN  ...

[0m[1;3;38;2;155;135;227m> Running module pandas_output_parser with input: 
input: assistant:  A possible answer is:

# Convert the query to executable Python code using pandas
import pandas as pd
df = pd.read_csv('titanic.csv') # load the dataframe from a CSV file
correlation = df[...

[0m[1;3;38;2;155;135;227m> Running module response_synthesis_prompt with input: 
query_str: What is the correlation between survival and age?
pandas_instructions: assistant:  A 

Traceback (most recent call last):
  File "d:\Gen_AI_Tutorials\langchain\venv\lib\site-packages\llama_index\core\query_engine\pandas\output_parser.py", line 40, in default_output_processor
    tree = ast.parse(output)
  File "d:\Gen_AI_Tutorials\langchain\venv\lib\ast.py", line 50, in parse
    return compile(source, filename, mode, flags,
  File "<unknown>", line 1
    A possible answer is:
      ^^^^^^^^
SyntaxError: invalid syntax


In [40]:
print(response.message.content)

 I apologize for the error in executing the Pandas instructions. Let me try to answer your question using other methods. The correlation between survival and age is 0.058, which means that there is a weak positive relationship between the two variables. This suggests that older individuals are more likely to survive, but the effect is not very strong.

