# QA with Pandas User Guide - Use Persistent Vector Database

This notebook assumes a Chroma DB with *Pandas User Guide* persisted in `./chroma_db/pandas` folder.

See notebook `QA with Pandas User Guide - Build Persistent Vector Database`.

In [None]:
import os
import openai
from IPython.display import display, HTML, Markdown
from pprint import pprint

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

In [None]:
from langchain.callbacks import OpenAICallbackHandler

totals_cb = OpenAICallbackHandler()

print(totals_cb)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embeddings = HuggingFaceEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")

db = Chroma(
    embedding_function=embeddings,
    persist_directory="./chroma_db/pandas"
)

In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI

memory = ConversationSummaryBufferMemory(
    llm=ChatOpenAI(temperature=0.0),
    k=2,
    memory_key="chat_history", 
    return_messages=True)

In [None]:
from langchain.chains import ConversationalRetrievalChain


llm = ChatOpenAI(temperature=0.0)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    memory=memory
)

In [None]:
question = "How to read CSV file into dataframe?"

result = qa_chain(question, callbacks=[totals_cb])

display(Markdown(result['answer']))

In [None]:
question = "Give me an example of code plotting an unstacked area plot of a dataframe with alpha blending"

result = qa_chain(question, callbacks=[totals_cb])

display(Markdown(result["answer"]))

In [None]:
import pandas as pd
import numpy as np

# Create a DataFrame
df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])

# Plot the unstacked area plot with alpha blending
df.plot.area(stacked=False, alpha=0.5)

In [None]:
question = "How to change the code above to use bar chart instead?"

result = qa_chain(question, callbacks=[totals_cb])

display(Markdown(result["answer"]))

In [None]:
question = "How to deal with missing data in a dataframe?"

result = qa_chain(question, callbacks=[totals_cb])

display(Markdown(result["answer"]))

In [None]:
question = "Give me an example of using PyArrow to improve speed of operations in a dataframe"

result = qa_chain(question, callbacks=[totals_cb])

display(Markdown(result["answer"]))

In [None]:
print(totals_cb)