In [None]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter

In [20]:
_ = load_dotenv(find_dotenv())
os.environ["OPENAI_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["OPENAI_API_BASE"] = os.getenv("OPENAI_API_BASE")

loader = PyPDFLoader("pdf-test.pdf")
pages = loader.load()
len(pages)

1

In [21]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(pages)

# Load Hugging Face embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create vector store
db = FAISS.from_documents(docs, embeddings)
retriever = db.as_retriever()

In [22]:
llm = ChatOpenAI(model="llama3-8b-8192")
qa_chain = load_qa_chain(llm, chain_type="stuff")

# Ask a question
while True:
    query = input("Ask a question (or type 'exit' to quit): ")
    if query.lower() == "exit":
        break
    relevant_docs = retriever.get_relevant_documents(query)
    answer = qa_chain.run(input_documents=relevant_docs, question=query)
    print(f"Answer: {answer}\n")

Answer: The heading of this file is "PDF Test File".

Answer: This appears to be a PDF file that is an introduction or disclaimer for a website related to the Yukon Department of Education in Canada.

