# This file creates a chatbot for a given PDF

In [None]:
from langchain.document_loaders import PyPDFLoader

pdf_path = "../data/williams.pdf"
loader = PyPDFLoader(pdf_path)
data = loader.load_and_split()  # already does the splitting
print(f'You have {len(data)} document(s) in your data.')
print(f'There are {len(data[0].page_content)} characters in your document')

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY not found in the environment variables. Check your .env file and make sure the variable name matches.")

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectoredb = Chroma().from_documents(
    data,
    embedding=embeddings,
    persist_directory="."
  )
vectoredb.persist()

In [None]:
from langchain.llms import OpenAI
from langchain.chains import ChatVectorDBChain

qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0), vectoredb)

In [None]:
chat_history = []
query = "Who is the document about? What are the main points? Describe the summary of their current assets and planning considerations and recommendations. Create Tables of the legal entities described and any other relevant plottable tables"
results = qa({"question": query, "chat_history": chat_history})

In [None]:
results["answer"]