In [396]:
%%capture --no-stderr
%pip install langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python langchain-text-splitters gpt4all arxiv
%pip install -U langchain langchain-openai

In [452]:
from tavily import TavilyClient
tavily = TavilyClient(api_key='')

response = tavily.search(query="Where does Messi play right now?", max_results=3)
context = [{"url": obj["url"], "content": obj["content"]} for obj in response['results']]

# You can easily get search result context based on any max tokens straight into your RAG.
# The response is a string of the context within the max_token limit.

response_context = tavily.get_search_context(query="Where does Messi play right now?", search_depth="advanced", max_tokens=500)

# You can also get a simple answer to a question including relevant sources all with a simple function call:
# You can use it for baseline
response_qna = tavily.qna_search(query="Where does Messi play right now?")


### 2. 그래프 스테이트 및 노드, 엣지 아래처럼 변경

In [463]:
import os


os.environ['OPENAI_API_KEY'] = ''
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_API_KEY'] = ""
os.environ['LANGCHAIN_PROJECT'] = ""


In [464]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [465]:
from langsmith.wrappers import wrap_openai
from langsmith import traceable

@traceable # Auto-trace this function
def pipeline(user_input: str):
    return llm.invoke(user_input)


In [466]:
# index

import bs4
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=urls,
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(loader.load())
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(texts, embeddings)

vector_store_retrievers = [vectorstore.as_retriever(), vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 6, 'lambda_mult': 0.25}
), vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 5, 'fetch_k': 50}
), vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={'score_threshold': 0.8}
), vectorstore.as_retriever(search_kwargs={'k': 1}), vectorstore.as_retriever(
    search_kwargs={'filter': {'paper_title': 'GPT-4 Technical Report'}}
)]

Created a chunk of size 2731, which is longer than the specified 1000
Created a chunk of size 1538, which is longer than the specified 1000
Created a chunk of size 1380, which is longer than the specified 1000
Created a chunk of size 2352, which is longer than the specified 1000
Created a chunk of size 1953, which is longer than the specified 1000
Created a chunk of size 1067, which is longer than the specified 1000
Created a chunk of size 1475, which is longer than the specified 1000
Created a chunk of size 2881, which is longer than the specified 1000
Created a chunk of size 1980, which is longer than the specified 1000
Created a chunk of size 4145, which is longer than the specified 1000
Created a chunk of size 2159, which is longer than the specified 1000
Created a chunk of size 1317, which is longer than the specified 1000
Created a chunk of size 1112, which is longer than the specified 1000
Created a chunk of size 1043, which is longer than the specified 1000
Created a chunk of s

In [467]:
# Relevance Checker
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

parser = JsonOutputParser()

relevant_prompt = PromptTemplate(
    template="""
    Determine if the question is relevant to context.
    question: {question}
    context: {context}
    
    A answer value is boolean, and key is 'relevant'

    {format_instructions}
    {question}
    {context}
    """,
    input_variables=["question", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

relevant_chain = relevant_prompt | llm | parser


In [468]:
# Generation
generate_prompt = PromptTemplate(
    template="""
    You are an assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise 
    
    {format_instructions}
    {question}
    {context}
    """,
    input_variables=["question", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Chain
generate_chain = generate_prompt | llm | parser

In [469]:
# Hallucination Checker

hallucination_checker_prompt = PromptTemplate(
    template="""
    answer: {answer}, question: {question}, context: {context}
    
    Determine if the answer contains hallucination based on the context and the question.
        
    Your answer value is boolean, and key is 'has_hallucination'

    {format_instructions}
    """,
    input_variables=["question", "answer", "context"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

hallucination_chain = hallucination_checker_prompt | llm | parser


In [470]:
from pprint import pprint
from typing import List

from langchain_core.documents import Document
from typing_extensions import TypedDict

from langgraph.graph import END, StateGraph

### State


class State(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    generation: str
    has_hallucination: bool
    web_search: str
    documents: List[str]


### Nodes
def docs_retrieval(state):
    print("---DOCS Retrieval---")
    question = state["question"]

    documents = []
    for r in vector_store_retrievers:
        documents.append(r.invoke(question))
    return {"documents": documents, "question": question}

def relevant_docs_checker(state):
    print("---CHECK RELEVANT DOCS---")
    question = state["question"]
    documents = state["documents"]
    
    relevant_documents = []
    for doc in documents:
        res = relevant_chain.invoke({"question": question, "context": doc})
        if res.get('relevant', False):
            if not relevant_checker(doc):
                relevant_documents.append(doc)

    return {
        "documents": relevant_documents, 
        "question": question
    }

def relevant_checker(possible_docs):
    irrelevant_question = 'I like an apple'

    res = relevant_chain.invoke({"question": irrelevant_question, "context": possible_docs})
    return res.get('relevant', False)

def generate_answer(state):
    print("---GENERATE ANSWER---")
    question = state["question"]
    relevant_documents = state["documents"]

    # RAG generation
    res = generate_chain.invoke({"context": relevant_documents[0], "question": question})
    possible_answer = res.get('answer', 'I like an apple')

    return {
        "documents": relevant_documents, 
        "question": question, 
        "generation": possible_answer
    }

def hallucination_checker(state):
    print("---CHECK HALLUCINATION---")
    question = state["question"]
    relevant_documents = state["documents"]
    possible_answer = state["generation"]
    
    has_hallucination = False
    for doc in relevant_documents:
        res = hallucination_chain.invoke({"answer": possible_answer, "question": question, "context": doc})
        has_hallucination |= res.get('has_hallucination', True)
    
    return {
        "documents": relevant_documents,
        "question": question,
        "generation": possible_answer,
        "has_hallucination": has_hallucination
    }

def web_searcher(state):
    print("---SEARCH WEB---")
    question = state["question"]
    responses = tavily.get_search_context(query="Where does Messi play right now?", search_depth="advanced", max_tokens=500, max_results=3)
    relevant_documents = [res for res in responses]
    
    return {
        "relevant_documents": relevant_documents,
        "question": question
    }

### Edges
def decide_to_generate(state):
    print("---Let System generate or web search---")
    relevant_documents = state["documents"]

    if len(relevant_documents) == 0:
        return "web_searcher"
    else:
        print("---DECISION: GENERATE---")
        return "generate_answer"

def decide_to_answer(state):
    print("---Let System print the answer or not---")
    has_hallucination = state["has_hallucination"]
    if has_hallucination:
        print("---DECISION: Re-Generate---")
        return "web_searcher"
    else:
        print("---DECISION: Print The Answer---")
        return "useful"

state_machine = StateGraph(State)

# Define the nodes
state_machine.add_node("docs_retrieval", docs_retrieval)
state_machine.add_node("relevant_docs_checker", relevant_docs_checker)
state_machine.add_node("generate_answer", generate_answer)
state_machine.add_node("hallucination_checker", hallucination_checker)
state_machine.add_node("web_searcher", web_searcher)

In [471]:
# Build graph
state_machine.set_entry_point("docs_retrieval")
state_machine.add_edge("docs_retrieval", "relevant_docs_checker")
state_machine.add_conditional_edges(
    "relevant_docs_checker",
    decide_to_generate,
    {
        "web_searcher": "web_searcher",
        "generate_answer": "generate_answer",
    },
)
state_machine.add_edge("web_searcher", "relevant_docs_checker")
state_machine.add_edge("generate_answer", "hallucination_checker")
state_machine.add_conditional_edges(
    "hallucination_checker",
    decide_to_answer,
    {
        "web_searcher": "web_searcher",
        "useful": END,
    },
)

In [476]:

# Compile
app = state_machine.compile()

# Test

inputs = {"question": "What are the types of agent memory?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
        
print("---DONE---")

---DOCS Retrieval---




'Finished running: docs_retrieval:'
---CHECK RELEVANT DOCS---
---Let System generate or web search---
---DECISION: GENERATE---
'Finished running: relevant_docs_checker:'
---GENERATE ANSWER---
'Finished running: generate_answer:'
---CHECK HALLUCINATION---
---Let System print the answer or not---
---DECISION: Print The Answer---
'Finished running: hallucination_checker:'
---DONE---


In [475]:
value['generation']

'The types of agent memory include sensory memory, short-term memory, and long-term memory. Sensory memory captures raw inputs briefly, short-term memory involves in-context learning, and long-term memory allows for the retention and retrieval of information over extended periods. Additionally, long-term memory can utilize external vector stores for efficient access.'