In [None]:
%pip install -q langchain langgraph langchain-docling langchain-qdrant langchain-text-splitters langgchain-ollama

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from langchain_ollamra import ChatOllama

reasoning_llm = ChatOllama(
    model="deepseek-r1:7b",
    stop=["</think>"]
)

answer_llm = ChatOllama(
    model="exaone3.5",
    temperature=0,
)

In [None]:
from typing import Annotated, List, TypedDict, Literal
from langgraph.graph.message import add_messages
from langchain_core.documents import Document

# RAG 상태 정의
class RAGState(TypedDict):
    """RAG 시스템의 상태를 정의합니다."""
    query: str # 사용자 정의
    thinking: str # reasoning_llm이 생성한 사고 과정
    document: List[Document] # 검색된 문서
    answer: str # 최종 답변
    messages: Annotated[List, add_messages]
    mode: str

In [None]:
from langchain_docling import DoclingLoader
from langchain_docling.loader import ExportType

FILE_PATH = "https://arxiv.org/pdf/2400.09869"

loader = DoclingLoader(
    file_path=FILE_PATH,
    export_type=ExportType.MARKDOWN
)

docs = loader.load()

In [None]:
from langchain_text_splitters import MarkdownHeaderTextSplitter

splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=[
        ("#", "Header_1"),
        ("##", "Header_2"),
        ("###", "Header_3"),
        ("####", "Header_4"),
        ("#####", "Header_5"),
        ("######", "Header_6"),
    ]
)
splits = [split for doc in docs for split in splitter.split_text(doc.page_content)]

for d in splits[:3]:
    print(f"- {d.page_content}...")
print("...")

In [None]:
from IPython.display import Markdown

display(Markdown(splits[12].page_content))

In [None]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="bge-m3:latest",
)

In [1]:
from langchain_qdrant import QdrantVectorStore
from langchain_qdrant import RetrievalMode

vector_store = QdrantVectorStore.from_documents(
    documents=splits,
    embedding=embeddings,
    location=":memory:",
    collection_name="rag_collection_0228",
    retrieval_mode=RetrievalMode.DENSE,
)

retriever = vector_store.as_retriever(search_kwargs={"k": 10})

SyntaxError: expected argument value expression (2818145918.py, line 8)

In [2]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder

model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
compressor = CrossEncoderReranker(
    model=model,
    top_n=5
)
contextual_compressor = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever,
)

SyntaxError: invalid syntax (2754928190.py, line 1)

In [None]:
from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

from langgraph.graph import START, StateGraph, END

def classify_mode(state: RAGState) -> str:
    """사용자 정의 질문을 분류하는 함수입니다."""
    return {
        "messages": [
            HumanMessage(
                content=state["query"],
                additional_kwargs={"mode": state["mode"]}
            )
        ]
    }
