Document loading

In [None]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('pdf')
doc = loader.load()

Splitting docs (this helps to improve retrieval speed in a naive rag workflow)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    separators = ['\n\n','\n',' ',''],
    chunk_size = 1000,
    chunk_overlap = 100
)

data = splitter.split_documents(doc)

Instantiating a retriever

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain.chroma import Chroma
import os
from dotenv import load_dotenv

load_dotenv()

# data storage for retrieval
embeddings = OpenAIEmbeddings(model='text-embedding-3-small',api_key = os.getenv('openai_api_key'))
vector_store = Chroma.from_documents(
    documents = data,
    embedding = embeddings
    persist_path = 'path/directory'
)

# retriever
retriever  = vector_store.as_retriever(
    search_type = 'similarity' \
    search_kwargs = {'k':2}
)

defining prompt

In [None]:
from langchain_core.prompts import ChatPromptTemplates

prompt = ChatPromptTemplates(
    """
you are a customer care representative, answer the questions using the below context:
context: {context}
question: {question}
"""
)

Instantiating model

In [None]:
fro langchain_openai import ChatOpenAI

llm = ChatOpenAI(model = 'gpt-40-mini',api_key = '...', temperature = 0)


Creating the chain to link the retriever, the prompt template and the llm sequentially

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

chain = ({'context':retriever, 'question': RunnablePassthrough}
         |prompt
         |llm
         |StrOutputParser()
         )
response = chain.invoke({'question':'how do i get a refund'})
print(response)