In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')


In [2]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("D:\MIT ADT\Projects\Build Fast with AI\Bitcoin.pdf")
pdf_data = loader.load()

In [3]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n",  # Split by newlines
    chunk_size=1000,  # Maximum chunk size
    chunk_overlap=150,  # Allow some overlap
    length_function=len
)

# Split the document into chunks
docs = text_splitter.split_documents(pdf_data)

In [4]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings(api_key = openai_api_key)

In [5]:
db = FAISS.from_documents(docs, embedding=embeddings)

In [6]:
retriever = db.as_retriever(search_kwargs={'k': 3})

In [7]:
from langchain.chains import RetrievalQA
from langchain import PromptTemplate
from langchain_openai import ChatOpenAI

model = ChatOpenAI(api_key=openai_api_key)

In [8]:
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Answer:
"""

In [9]:
prompt = PromptTemplate(template=custom_prompt_template,
                       input_variables=['context', 'question'])

In [10]:
qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",  
    retriever=retriever,
    return_source_documents=True,  # This will return the source documents alongside the answer
    chain_type_kwargs={"prompt": prompt}
)

In [11]:
question = "What is the main topic of the document?"

# Run the query and retrieve both the answer and source documents
result = qa({"query": question})

  result = qa({"query": question})


In [12]:
# Access the answer and source documents separately
answer = result['result']
source_documents = result['source_documents']

# Display the answer and source documents
print("Answer:", answer)
print("Source Documents:", source_documents)

Answer: The main topic of the document is Bitcoin and its relevance for international financial institutions.
Source Documents: [Document(id='adc69223-bb18-4ef4-af0f-c88cb23de90d', metadata={'source': 'D:\\MIT ADT\\Projects\\Build Fast with AI\\Bitcoin.pdf', 'page': 25}, page_content='ency-will-collapse.html) from the original on 8 July 2022. Retrieved 22 November 2023.\n154. Davradakis, Emmanouil; Santos, Ricardo (2019). Blockchain, FinTechs and their relevance\nfor international financial institutions (https://data.europa.eu/doi/10.2867/11329). European\nInvestment Bank. doi:10.2867/11329 (https://doi.org/10.2867%2F11329). ISBN 978-92-861-\n4184-3. Archived (https://web.archive.org/web/20240421113801/https://op.europa.eu/en/pu\nblication-detail/-/publication/ae472145-237a-11e9-8d04-01aa75ed71a1/language-en) from\nthe original on 21 April 2024. Retrieved 24 November 2023.\nNakamoto, Satoshi (31 October 2008). "Bitcoin: A Peer-to-Peer Electronic Cash System" (ht\ntps://web.archive.org/