In [2]:
from langchain_community.llms import Ollama
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [5]:
#1 加载 PDF 文件
pdf_path = "ETX-AI.pdf"
loader = PyPDFLoader(pdf_path)
docs = loader.load()

print(docs[0].page_content[0:100])
print(docs[0].metadata)
# 打印提取的文档内容（仅前几页的内容）
# for doc in documents[:7]:
#     print(doc.page_content)


{'source': 'ETX-AI.pdf', 'page': 0}


In [6]:
# 1. Create the model
llm = Ollama(model='llama3.2:latest')
embeddings = OllamaEmbeddings(model='znbang/bge:small-en-v1.5-f32')

# 2. Load the PDF file and create a retriever to be used for providing context
# loader = PyPDFLoader(argv[1])
# pages = loader.load_and_split()
store = DocArrayInMemorySearch.from_documents(docs, embedding=embeddings)
retriever = store.as_retriever()

#



In [7]:
 #3. Create the prompt template
template = """
Answer the question based only on the context provided.

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

# 4. Build the chain of operations
chain = (
  {
    'context': retriever | format_docs,
    'question': RunnablePassthrough(),
  }
  | prompt
  | llm
  | StrOutputParser()
)

question = "what is about ETX AI pilot?"
chain.invoke({'question': question})
# 5. Start asking questions and getting answers in a loop
# while True:
#   question = input('What do you want to learn from the document?\n')
#   print()
#   print(chain.invoke({'question': question}))
#   print()

'To confirm if your registration was received, you should check your inbox for confirmation emails throughout the registration process. If you do not receive these confirmation emails, please contact Katie Day (kday@redhat.com) for status updates.'