In [10]:
import os
import openai
import dotenv

from tqdm import tqdm
from datasets import load_dataset

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain import hub
from langchain.document_loaders import WebBaseLoader
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain

from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough

dotenv.load_dotenv()

True

In [11]:
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
embedding = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"), model="text-embedding-3-small")
loader = WebBaseLoader([
    "https://applied-llms.org",
])
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

Created a chunk of size 1941, which is longer than the specified 1000
Created a chunk of size 1120, which is longer than the specified 1000
Created a chunk of size 1912, which is longer than the specified 1000
Created a chunk of size 1523, which is longer than the specified 1000
Created a chunk of size 1129, which is longer than the specified 1000
Created a chunk of size 2242, which is longer than the specified 1000
Created a chunk of size 1785, which is longer than the specified 1000
Created a chunk of size 1053, which is longer than the specified 1000
Created a chunk of size 1454, which is longer than the specified 1000
Created a chunk of size 1656, which is longer than the specified 1000
Created a chunk of size 2243, which is longer than the specified 1000
Created a chunk of size 1086, which is longer than the specified 1000
Created a chunk of size 2412, which is longer than the specified 1000
Created a chunk of size 1730, which is longer than the specified 1000
Created a chunk of s

In [3]:
store = FAISS.from_documents(docs, embedding)
retriever = store.as_retriever()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

input_chain = {"context": retriever | format_docs, "input": RunnablePassthrough()}

In [12]:
# 라우팅을 위한 관계 정보를 내보내는 절차
class RoutingResult(BaseModel):
    input: str = Field(description="Original user 'input' field")
    relevance: str = Field(description="The relevance of the answer to the context it must be 'yes' or 'no")
    context: str = Field(description="Original user 'context' field ")

routing_parser = JsonOutputParser(pydantic_object=RoutingResult)
routing_prompt = PromptTemplate(
    template="""
If user input is related to context, answer relevance 'yes', but if not, answer relevance 'no'.
If relevance is 'no', context should be empty.
context: {context}

input: {input}

{format_instructions}
""",
    input_variables=["input"],
    partial_variables={"format_instructions": routing_parser.get_format_instructions()},
)

router_chain = routing_prompt | llm | routing_parser

test_chain = input_chain | router_chain

print(f'RAG 에 대해 질문 : {test_chain.invoke("Explain me what is RAG")}')
print(f'엘든링에 대해 질문 : {test_chain.invoke("Explain me what is Elden Ring")}')

RAG 에 대해 질문 : {'input': 'Explain me what is RAG', 'relevance': 'yes', 'context': '1.2 Information Retrieval / RAG\nBeyond prompting, another effective way to steer an LLM is by providing knowledge as part of the prompt. This grounds the LLM on the provided context which is then used for in-context learning. This is known as retrieval-augmented generation (RAG). Practitioners have found RAG effective at providing knowledge and improving output, while requiring far less effort and cost compared to finetuning.'}
엘든링에 대해 질문 : {'input': 'Explain me what is Elden Ring', 'relevance': 'no', 'context': ''}


In [13]:
# 응답을 추가해주는 절차
class GeneratingResult(BaseModel):
    input: str = Field(description="Original user 'input' field")
    relevance: str = Field(description="Original user 'relevance' field")
    answer: str = Field(description="Using 'input' and 'relevance' fields, generate an answer, only if relevance is 'yes'")
    context: str = Field(description="Original user 'context' field ")

generating_parser = JsonOutputParser(pydantic_object=GeneratingResult)
generating_prompt = PromptTemplate(
    template="""
Answer the user question only if relevance is 'yes'.
When you answer must using context.
If relevance is 'no', you can leave the answer field empty.

relevance: {relevance}

context: {context}

input: {input}

{format_instructions}
""",
    input_variables=["input"],
    partial_variables={"format_instructions": generating_parser.get_format_instructions()},
)

generating_chain = generating_prompt | llm | generating_parser

test_chain = input_chain | router_chain | generating_chain

print(f'RAG 에 대해 질문 : {test_chain.invoke("Explain me what is RAG")}')
print(f'엘든링에 대해 질문 : {test_chain.invoke("Explain me what is Elden Ring")}')

RAG 에 대해 질문 : {'input': 'Explain me what is RAG', 'relevance': 'yes', 'answer': 'RAG, or retrieval-augmented generation, is a method that enhances the capabilities of language models by providing them with knowledge as part of their prompts. This technique grounds the language model on the given context, allowing it to perform in-context learning. RAG has been found to be effective in delivering knowledge and improving output while being less resource-intensive compared to traditional finetuning methods.', 'context': '1.2 Information Retrieval / RAG\nBeyond prompting, another effective way to steer an LLM is by providing knowledge as part of the prompt. This grounds the LLM on the provided context which is then used for in-context learning. This is known as retrieval-augmented generation (RAG). Practitioners have found RAG effective at providing knowledge and improving output, while requiring far less effort and cost compared to finetuning.'}
엘든링에 대해 질문 : {'input': 'Explain me what is 

In [16]:
# 응답을 추가해주는 절차
class HallucinationResult(BaseModel):
    input: str = Field(description="Original user 'input' field")
    relevance: str = Field(description="Original user 'relevance' field")
    hallucination: str = Field(description="The hallucination of the answer it must be 'yes' or 'no'")
    answer: str = Field(description="Using 'input' and 'relevance' fields, generate an answer, only if relevance is 'yes' if relevance is 'no' answer must be empty")
    context: str = Field(description="Original user 'context' field ")

hallucination_parser = JsonOutputParser(pydantic_object=HallucinationResult)
hallucination_prompt = PromptTemplate(
    template="""
Using context and input, check the answer if the answer is real or hallucination.
If answer looks like a hallucination, hallucination must be 'yes', but if not, hallucination must be 'no'.
If relevance is 'no', hallucination must be empty.

answer: {answer}

relevance: {relevance}

context: {context}

input: {input}

{format_instructions}
""",
    input_variables=["input"],
    partial_variables={"format_instructions": hallucination_parser.get_format_instructions()},
)

hallucination_chain = hallucination_prompt | llm | hallucination_parser

test_chain = input_chain | router_chain | generating_chain | hallucination_chain

print(f'RAG 에 대해 질문 : {test_chain.invoke("Explain me what is RAG")}')
print(f'엘든링에 대해 질문 : {test_chain.invoke("Explain me what is Elden Ring")}')

RAG 에 대해 질문 : {'input': 'Explain me what is RAG', 'relevance': 'yes', 'hallucination': 'no', 'answer': 'RAG, or retrieval-augmented generation, is a method that enhances the capabilities of a language model (LLM) by providing it with knowledge as part of the prompt. This approach grounds the LLM in the provided context, allowing it to engage in in-context learning. RAG has been found effective in supplying knowledge and enhancing output while requiring significantly less effort and cost compared to traditional finetuning.', 'context': '1.2 Information Retrieval / RAG\nBeyond prompting, another effective way to steer an LLM is by providing knowledge as part of the prompt. This grounds the LLM on the provided context which is then used for in-context learning. This is known as retrieval-augmented generation (RAG). Practitioners have found RAG effective at providing knowledge and improving output, while requiring far less effort and cost compared to finetuning.'}
엘든링에 대해 질문 : {'input': 'E

In [17]:

class RecursiveHResult(BaseModel):
    input: str = Field(description="Original user 'input' field")
    relevance: str = Field(description="Original user 'relevance' field")
    hallucination: str = Field(description="The hallucination of the answer it must be 'yes' or 'no'")
    answer: str = Field(description="Regenerate Only if hallucination is 'yes'")
    context: str = Field(description="Original user 'context' field ")

recursive_h_parser = JsonOutputParser(pydantic_object=RecursiveHResult)
recursive_h_prompt = PromptTemplate(
    template="""
Using context and input, check the answer if the answer is real or hallucination.

If relevance is 'no', hallucination must be empty. and aswer must be empty.

If relevance is 'yes', answer must be generated. 
But this time, change answer only when hallucination is 'yes'.
If relevance is 'yes' and hallucination is 'no', answer must be the same as the previous answer.
Else if relevance is 'yes' and hallucination is 'yes', answer must be regenerated.


answer: {answer}

relevance: {relevance}

hallucination: {hallucination}

context: {context}

input: {input}

{format_instructions}
""",
    input_variables=["input"],
    partial_variables={"format_instructions": recursive_h_parser.get_format_instructions()},
)

recursive_h_chain = recursive_h_prompt | llm | recursive_h_parser

test_chain = input_chain | router_chain | generating_chain | hallucination_chain | recursive_h_chain | recursive_h_chain

print(f'RAG 에 대해 질문 : {test_chain.invoke("Explain me what is RAG")}')
print(f'엘든링에 대해 질문 : {test_chain.invoke("Explain me what is Elden Ring")}')

RAG 에 대해 질문 : {'input': 'Explain me what is RAG', 'relevance': 'yes', 'hallucination': 'no', 'answer': 'RAG, or retrieval-augmented generation, is a technique that enhances the capabilities of a language model by providing it with relevant knowledge as part of the prompt. This method grounds the model on the supplied context, facilitating in-context learning and leading to more informed and accurate outputs. It is recognized for being effective at providing knowledge and improving output while requiring significantly less effort and cost compared to traditional finetuning methods.', 'context': '1.2 Information Retrieval / RAG Beyond prompting, another effective way to steer an LLM is by providing knowledge as part of the prompt. This grounds the LLM on the provided context which is then used for in-context learning. This is known as retrieval-augmented generation (RAG). Practitioners have found RAG effective at providing knowledge and improving output, while requiring far less effort a