In [2]:
from dotenv import load_dotenv

load_dotenv()

from rich.console import Console
console = Console(width=96)

## Semantic search

In [5]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("resources/acmecorp-employee-handbook.pdf")

data = loader.load()

console.print(data)

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(data)

console.print(len(all_splits))

In [9]:
console.print(all_splits)


Embedding Models: https://docs.langchain.com/oss/python/integrations/text_embedding

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [8]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [10]:
ids = vector_store.add_documents(documents=all_splits)

In [11]:
results = vector_store.similarity_search(
    "How many days of vacation does an employee get in their first year?"
)

console.print(results[0])

## RAG Agent

In [12]:
from langchain.tools import tool

@tool
def search_handbook(query: str) -> str:
    """Search the employee handbook for information"""
    results = vector_store.similarity_search(query)
    return results[0].page_content

In [13]:
from langchain.agents import create_agent

agent = create_agent(
    model="gpt-5-nano",
    tools=[search_handbook],
    system_prompt="You are a helpful agent that can search the employee handbook for information."
    )

In [14]:
from langchain.messages import HumanMessage

response = agent.invoke(
    {"messages": [HumanMessage(content="How many days of vacation does an employee get in their first year?")]}
)

In [15]:
from pprint import pprint

console.print(response)