In [7]:
import os
import openai
import dotenv

from tqdm import tqdm
from datasets import load_dataset

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain import hub
from langchain.document_loaders import WebBaseLoader
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain

from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough

dotenv.load_dotenv()

True

In [2]:
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
embedding = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"), model="text-embedding-3-small")
loader = WebBaseLoader([
    "https://applied-llms.org",
])
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

Created a chunk of size 1941, which is longer than the specified 1000
Created a chunk of size 1120, which is longer than the specified 1000
Created a chunk of size 1912, which is longer than the specified 1000
Created a chunk of size 1523, which is longer than the specified 1000
Created a chunk of size 1129, which is longer than the specified 1000
Created a chunk of size 2242, which is longer than the specified 1000
Created a chunk of size 1785, which is longer than the specified 1000
Created a chunk of size 1053, which is longer than the specified 1000
Created a chunk of size 1454, which is longer than the specified 1000
Created a chunk of size 1656, which is longer than the specified 1000
Created a chunk of size 2243, which is longer than the specified 1000
Created a chunk of size 1086, which is longer than the specified 1000
Created a chunk of size 2412, which is longer than the specified 1000
Created a chunk of size 1730, which is longer than the specified 1000
Created a chunk of s

In [3]:
store = FAISS.from_documents(docs, embedding)
retriever = store.as_retriever()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

input_chain = {"context": retriever | format_docs, "input": RunnablePassthrough()}

In [11]:
# 라우팅을 위한 관계 정보를 내보내는 절차
class RoutingResult(BaseModel):
    input: str = Field(description="The input that was given to the model")
    relevance: str = Field(description="The relevance of the answer to the context it must be 'yes' or 'no")
    context: str = Field(description="The context in which the answer is relevant")

routing_parser = JsonOutputParser(pydantic_object=RoutingResult)
routing_prompt = PromptTemplate(
    template="""
Uf user input is related to context, answer relevance 'yes', but if not, answer relevance 'no'.

{context}

{input}

{format_instructions}
""",
    input_variables=["input"],
    partial_variables={"format_instructions": routing_parser.get_format_instructions()},
)

router_chain = routing_prompt | llm | routing_parser

test_chain = input_chain | router_chain

print(f'RAG 에 대해 질문 : {test_chain.invoke("Explain me what is RAG")}')
print(f'엘든링에 대해 질문 : {test_chain.invoke("Explain me what is Elden Ring")}')

RAG 에 대해 질문 : {'relevance': 'yes'}
엘든링에 대해 질문 : {'relevance': 'no'}
