In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
OPENAI_API_KEY = os.environ.get('OPEN_AI_API_KEY')

In [None]:
from langchain.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("../data/williams.pdf")
data = loader.load()
data[0]

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings)

In [None]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Please provide a comprehensive summary of the provided legal document, highlighting the following key elements:
1. Identification of the parties involved, including their roles and responsibilities.
2. Explanation of the legal structures and frameworks utilized within the document.
3. Description of the financial accounts, including their types, purposes, and any specific conditions.
4. Mention any additional relevant data, such as important dates, clauses, or agreements.

Ensure that the summary is detailed and captures the essential aspects of the legal document.


Let's think about this step by step.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)


In [None]:
query = "What is this document about?"
qa.run(query)