# CSV file with Pandas Agent

In [None]:
import os
import openai
from IPython.display import display, HTML, Markdown
from pprint import pprint

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
from langchain.callbacks import OpenAICallbackHandler

totals_cb = OpenAICallbackHandler()

print(totals_cb)

### Download example CSV data set

Download `train.csv` file from [Kaggle Spaceship Titanic Competition](https://www.kaggle.com/competitions/spaceship-titanic/data?select=train.csv) into current folder of this notebook.

Sign up and sign in to [Kaggle](https://www.kaggle.com) required.

In [None]:
import pandas as pd

df = pd.read_csv("train.csv")

df.tail()

In [None]:
from langchain.memory import ConversationBufferWindowMemory
from langchain.chat_models import ChatOpenAI
from langchain.prompts import MessagesPlaceholder

memory = ConversationBufferWindowMemory(k=3, memory_key="chat_history", return_messages=True)

agent_kwargs = {
    "extra_prompt_messages": [MessagesPlaceholder(variable_name="chat_history")],
}

In [None]:
agent_executor_kwargs = {
    "handle_parsing_errors": True
}

In [None]:
from langchain.agents import create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_types import AgentType

agent = create_pandas_dataframe_agent(
    ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    df,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    memory=memory,
    agent_executor_kwargs=agent_executor_kwargs,
)

In [None]:
pprint(agent.agent.tools)

In [None]:
pprint(agent.agent.prompt)

In [None]:
display(Markdown(agent.agent.prompt.messages[0].content))

In [None]:
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
print(len(encoding.encode(agent.agent.prompt.messages[0].content)))

In [None]:
response = agent.run("Describe the dataframe", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("Analyse missing values", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("What is the distribution of home planet?", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("How many VIP passangers are there from Mars?", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("Ship deck is encoded as first letter in cabin name. How many decks are there?", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("On which decks (A, B, C, ...) passangers from Mars have cabins?", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("Passangers from which planet have spent most at Food Court?", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("Passengers going to which destination spent most at VRDeck?", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
response = agent.run("Plot distribution of destination as pie chart", callbacks=[totals_cb])

display(Markdown(response))

In [None]:
print(totals_cb)