In [2]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='../.env', verbose=True)

True

In [4]:
import os

print(os.getenv('EMBEDDINGS_MODEL'))

models/text-embedding-004


In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

llm = ChatGoogleGenerativeAI(model=os.getenv('GEMINI_MODEL'))
embeddings = GoogleGenerativeAIEmbeddings(model=os.getenv('EMBEDDINGS_MODEL'))

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
embeddings.embed_query("Hello, how are you?")

In [6]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [14]:
from langchain_community.document_loaders import PyPDFLoader

document_path = '../example_pdfs/RBI_DEBIT_CREDIT_CARD.pdf'
pdf_loader = PyPDFLoader(document_path)

pages = pdf_loader.load()

In [18]:
pages[36].page_content

'Billy Meinke \nBilly is the Open Educ ational R esources T echnologist f or the \nOutreach College at the University of Hawai‘i at Mānoa. \nAbout the Contributors  |  xxxvii'

In [19]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

all_splits = text_splitter.split_documents(pages)

In [None]:
_ = vector_store.add_documents(documents=all_splits)

In [22]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [23]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [24]:
from typing_extensions import TypedDict, List
from langchain_core.documents import Document

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [25]:
from langgraph.graph import START, StateGraph

def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [26]:
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
response = graph.invoke({"question": "What is a debit and credit card?"})
print(response["answer"])