In [29]:
from langgraph.graph import START, END, StateGraph
from langgraph.graph.message import add_messages, MessagesState
from langchain_openai import ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.tools import tool
from typing import TypedDict, Annotated
from IPython.display import Image, display
from dotenv import load_dotenv
import os

load_dotenv()

from langchain_openai import ChatOpenAI



In [49]:
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import tools_condition, ToolNode
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from IPython.display import Image, display
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

class OverallState(MessagesState):
    documents: list[Document]
    
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=64
)

embeddings = OpenAIEmbeddings(
    api_key=os.getenv("OPENAI_API_KEY"),
    model="text-embedding-3-large"
)

llm = ChatOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    model="gpt-4o-mini"
)

def load_document(state: OverallState):
    loader = PyPDFLoader("data/AML_IEEE_ACCESS_2024.pdf", extract_images=True)
    pages = loader.load()
    return {"documents": pages}

def split_text(state: OverallState):
    texts = text_splitter.split_documents(state["documents"])
    return {"documents": texts}

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def ask_question(state: OverallState):
    db = Chroma.from_documents(state["documents"], embeddings, persist_directory="db")
    retriever = db.as_retriever()
    rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm
    message = rag_chain.invoke(state["messages"][0].content)
    return {"messages": message.content}


builder = StateGraph(OverallState)
builder.add_node("Load Documents", load_document)
builder.add_node("Split Text", split_text)
builder.add_node("Ask Question", ask_question)
builder.add_edge(START, "Load Documents")
builder.add_edge("Load Documents", "Split Text")
builder.add_edge("Split Text", "Ask Question")
builder.add_edge("Ask Question", END)
graph = builder.compile()
print(graph.invoke({"messages": ["Who are you"]})["messages"])
    
    
    

[HumanMessage(content='Who are you', additional_kwargs={}, response_metadata={}, id='85b84add-839a-4515-832a-87ba99bc16ff'), HumanMessage(content='I am an assistant for question-answering tasks. I utilize retrieved context to provide concise responses to inquiries. If you have any specific questions, feel free to ask!', additional_kwargs={}, response_metadata={}, id='840a6463-5ecd-4484-a636-f7a078dcdcf8')]
