# 🤖 Agentic Workflow: PDF Knowledge Assistant

This notebook demonstrates how to build an agent that can reason through a task involving document retrieval and summarization.

In [None]:
# Setup
import os
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.agents import initialize_agent, Tool
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")

In [None]:
# Step 1: Load and split PDF
loader = PyPDFLoader("sample.pdf")
pages = loader.load_and_split()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(pages)

In [None]:
# Step 2: Create vector store
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)
db = FAISS.from_documents(docs, embeddings)

In [None]:
# Step 3: Define a QA chain
llm = OpenAI(openai_api_key=openai_key, temperature=0)
qa_chain = load_qa_chain(llm, chain_type="stuff")

def search_and_summarize(query):
    matches = db.similarity_search(query)
    return qa_chain.run(input_documents=matches, question=query)

In [None]:
# Step 4: Register the search tool and initialize agent
tools = [Tool(name="PDFSearch", func=search_and_summarize, description="Useful for answering questions about the PDF.")]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

# Try it out
agent.run("Summarize the key findings from this document.")