## **RAG 정복하기**

In [None]:
# !pip install -q langchain langchain-openai langchain-ollama langchain-community langchain-chroma langchain-text-splitters tiktoken huggingface_hub sentence_transformers pypdf grandalf

### **RAG 구축하기 – 기본적인 QA 체인 구성**

**[필요한 라이브러리 호출 및 API키 설정]**

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"

**[문서 로드/분할 및 벡터 임베딩]**

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

#헌법 PDF 파일 로드
loader = PyPDFLoader(r"../data/대한민국헌법(헌법)(제00010호)(19880225).pdf")
pages = loader.load_and_split()

#PDF 파일을 1000자 청크로 분할
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(pages)

#ChromaDB에 청크들을 벡터 임베딩으로 저장(OpenAI 임베딩 모델 활용)
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings(model = 'text-embedding-3-small'))
retriever = vectorstore.as_retriever()

**[프롬프트와 모델 선언]**

In [None]:
#GPT 3.5 모델 선언
from langchain import hub
llm = ChatOpenAI(model="gpt-4o-mini")

#Langchain Hub에서 RAG 프롬프트 호출
prompt = hub.pull("rlm/rag-prompt")

#Retriever로 검색한 유사 문서의 내용을 하나의 string으로 결합
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
prompt

In [None]:
prompt.messages

**[Chain 구축]**

In [None]:
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
answer = rag_chain.invoke("국회의원의 의무는 뭐야?")
print(answer)

- Chain 구조 출력

In [None]:
rag_chain.get_graph().print_ascii()

### **RAG 구축하기 – Memory**

**[문서 로드-분할-벡터 저장-Retreiver 생성]**

In [None]:
from langchain import hub
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables.history import BaseChatMessageHistory, RunnableWithMessageHistory

# PDF 파일 로드 및 처리
loader = PyPDFLoader(r"../data/대한민국헌법(헌법)(제00010호)(19880225).pdf")

# 1,000자씩 분할하여 Document 객체 형태로 docs에 저장
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(pages)

# Chroma 벡터 저장소 설정 및 retriever 생성
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings(model='text-embedding-3-small'))
retriever = vectorstore.as_retriever()

**[채팅 히스토리와 사용자 질문 통합]**

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

# Define the contextualize question prompt
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

In [None]:
from langchain_core.messages import AIMessage, HumanMessage

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

chat_history = [
    HumanMessage(content='대통령의 임기는 몇년이야?'),
    AIMessage(content='대통령의 임기는 5년입니다.')
]

contextualize_q_prompt.invoke({"input":"국회의원은?", "chat_history" : chat_history})

In [None]:
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)
result = history_aware_retriever.invoke({"input":"국회의원은?", "chat_history" : chat_history})
for i in range(len(result)):
    print(f"{i+1}번째 유사 청크")
    print(result[i].page_content[:250])
    print("-"*100)

**[RAG 체인 구축]**

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

**[RAG 체인 사용 방법 및 채팅 히스토리 기록]**

In [None]:
from langchain_core.messages import HumanMessage
#채팅 히스토리를 적재하기 위한 리스트
chat_history = []

question = "대통령의 임기는 몇년이야?"
#첫 질문에 답변하기 위한 rag_chain 실행
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
#첫 질문과 답변을 채팅 히스토리로 저장
chat_history.extend([HumanMessage(content=question), ai_msg_1["answer"]])

second_question = "국회의원은?"
#두번째 질문 입력 시에는 첫번째 질문-답변이 저장된 chat_history가 삽입됨
ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

**[채팅 세션별 기록 자동 저장 RAG 체인 구축]**

In [None]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

#채팅 세션별 기록 저장 위한 Dictionary 선언
store = {}

#주어진 session_id 값에 매칭되는 채팅 히스토리 가져오는 함수 선언
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


#RunnableWithMessageHistory 모듈로 rag_chain에 채팅 기록 세션별로 자동 저장 기능 추가
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [None]:
conversational_rag_chain.invoke(
    {"input": "대통령의 임기는 몇년이야?"},
    config={
        "configurable": {"session_id": "240510101"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

In [None]:
conversational_rag_chain.invoke(
    {"input": "국회의원은?"},
    config={"configurable": {"session_id": "240510101"}},
)["answer"]


### **Open Source LLM으로 RAG 시스템 구축하기**

**책에 명시된 Ollama 세팅 및 EEVE 모델 다운로드가 완료되어야 실행 가능한 셀입니다.**

In [None]:
import torch

# PyTorch 버전 확인
print(f"PyTorch 버전: {torch.__version__}")

# CUDA 버전 확인 (CUDA를 사용할 수 있는 경우)
if torch.cuda.is_available():
    print(f"CUDA 버전: {torch.version.cuda}")
else:
    print("CUDA를 사용할 수 없습니다.")

# CUDA 사용 가능 여부 및 디바이스 설정
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"CUDA를 사용할 수 있습니다. 사용 가능한 GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("CUDA를 사용할 수 없습니다. CPU를 사용합니다.")

print(f"현재 사용 중인 디바이스: {device}")

In [None]:
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

llm = ChatOllama(model="EEVE-Korean-10.8B:latest")
prompt = ChatPromptTemplate.from_template("{topic}에 대한 짧은 농담을 들려주세요. ")

chain = prompt | llm | StrOutputParser()

print(chain.invoke({"topic": "우주여행"}))

**모든 요소를 Open Source로 RAG 체인 구축하기**

In [None]:
Chroma().delete_collection()

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_ollama import ChatOllama
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

loader = PyPDFLoader(r"../data/대한민국헌법(헌법)(제00010호)(19880225).pdf")
pages = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(pages)

model_name = "jhgan/ko-sbert-nli"
model_kwargs = {'device': 'CUDA'}
encode_kwargs = {'normalize_embeddings': True}

embedding = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vectorstore = Chroma.from_documents(docs, embedding)
retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

**[rag_chain 답변 스트리밍하기]**

In [None]:
for chunk in rag_chain.stream("헌법 제 1조 1항이 뭐야"):
    print(chunk, end="", flush=True)