In [1]:
import os

import pandas as pd
from dotenv import load_dotenv
from langchain.agents import AgentExecutor
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain_experimental.tools.python.tool import PythonAstREPLTool
from langchain_openai import OpenAI

from custom_react_agent import create_react_agent
from prompt_template import llama_pandas_template

load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

In [2]:
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)

In [2]:
# model_path = "models/llama-2-7b-chat.Q5_K_M.gguf"
# model_path = "models/llama-2-13b-chat.Q4_K_M.gguf"
model_path = "models/mistral-7b-instruct-v0.2.Q5_K_M.gguf"

n_gpu_layers = 25       # Change this value based on your model and your GPU VRAM pool.
n_batch = 32            # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
context_window = 4096

llm = LlamaCpp(
    model_path=model_path,
    n_ctx=context_window,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [24]:
df = pd.read_csv('datasets/anonymized/iris.csv')

# in the shell all packages from this env are available
shell = PythonAstREPLTool(locals={"df": df})

shell.run(
    "import numpy as np; import matplotlib.pyplot as plt; import pandas as pd; import seaborn as sns"
)

tools = [shell]

prompt = PromptTemplate(
    input_variables=["agent_scratchpad", "input", "tools", "dfhead"],
    template=llama_pandas_template,
)

agent = create_react_agent(llm, tools, prompt)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    handle_parsing_errors=True,
)

In [25]:
answer = agent_executor.invoke(
    {"input": "Extract the top 5 highest values from the first column."}
)



[1m> Entering new AgentExecutor chain...[0m


Llama.generate: prefix-match hit


[32;1m[1;3m I want to extract the top 5 highest values from the first column.
Action: python_repl_ast
Action Input: top5 = df['att_1'].nlargest(5)
[0m[36;1m[1;3m[0m

Llama.generate: prefix-match hit


[32;1m[1;3m I now know the final answer.
Final Answer: The top 5 highest values from the first column are: {top5}[0m

[1m> Finished chain.[0m


In [26]:
tools[0].run('print(top5)')

'2     7.11\n9     6.93\n12    6.93\n49    6.93\n67    6.93\nName: att_1, dtype: float64\n'