In [1]:
from langchain_community.chat_models import ChatOllama
llm = ChatOllama(model='llama3', temperature=0)



In [2]:
# pip install langchain_chroma

In [3]:
import os

# Set the API key
os.environ['OPENAI_API_KEY'] = "sk-"

In [4]:
# pip install langchain_text_splitters

In [5]:
# pip install langchain_openai

## Todo
0. vector store에 데이터 저장 [v]
1. 유저로부터 Query 받기 [v]
2. 쿼리로 retrieval 얻기 [v]
3. 관련성 체크 (cosine similarity가 N이상인지?) [v]
4. 통과 못한 경우 context에 없다고 답변 [v]
5. 통과한 경우 RAG + query를 넣어서 답변 [v]
6. 할루시네이션 체크 (근거가 있는 사실인지 물어보기) [v]
7. 할루시네이션이 아닌 경우 바로 답변 [v]
8. 할루시네이션인 경우 다시 답변 생성 (retry 5 times)

## Split and Load

In [6]:
# Build a sample vectorDB
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

all_splits = []

for url in urls:
    loader = WebBaseLoader(url)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    splits = text_splitter.split_documents(data)
    all_splits.extend(splits)
    
    # VectorDB
    embedding = OpenAIEmbeddings()
    vectordb = Chroma.from_documents(persist_directory="./chroma_db", documents=all_splits, embedding=embedding)

## Query

In [7]:
from langchain.retrievers.multi_query import MultiQueryRetriever

question = "agent memory"
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm
)

rags = retriever_from_llm.invoke(question)

## Relevance Check - Try 1 cosine similarity

In [8]:
import math
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough

def cosine_similarity(vec1, vec2):
    # Step 1: Compute the dot product
    dot_product = sum(a * b for a, b in zip(vec1, vec2))

    # Step 2: Compute the magnitudes of the vectors
    magnitude_vec1 = math.sqrt(sum(a * a for a in vec1))
    magnitude_vec2 = math.sqrt(sum(b * b for b in vec2))

    # Step 3: Compute the cosine similarity
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        # To avoid division by zero
        return 0.0
    return dot_product / (magnitude_vec1 * magnitude_vec2)

def prompt_router(query, rags):
    embeddings = OpenAIEmbeddings()

    query_embedding = embeddings.embed_query(query)

    rag_texts = [doc.page_content for doc in rags]  # Using the 'page_content' attribute
    rag_embeddings = embeddings.embed_documents(rag_texts) 

    similarities = [cosine_similarity(query_embedding, rag_embedding) for rag_embedding in rag_embeddings]

    # Filter documents with similarity > 0.7
    similar_documents = []
    for i, sim in enumerate(similarities):
        print(sim)
        if sim > 0.8:
            similar_documents.append(rags[i])  # Assuming rags is a list of Document objects

    return similar_documents

# similar_documents = prompt_router(question, rags)
# print(similar_documents)
# 어떻게 이 결과를 llm에 넘겨서 없다고 대답하게 하지.. 
# cosine similarity로는 관련도를 제대로 판단을 못하는거 같다 ㅠ.ㅠ

## Relevance Check - Try 2 LLM as a Judge

In [9]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
import re

def extract_score_from_output(content):
    pattern = r"#Answer:\s*(-?\d+(?:\.\d+)?)"
    matches = re.findall(pattern, content)
    
    if matches:
        try:
            score = float(matches[0])
            return score
        except ValueError as e:
            print(f"Error converting score to float: {e}")
    return None

template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    
You are developing a retrieval(context) evaluator system. 
Define the criteria that determine if a retrieved document is relevant to a user's question. 
Your goal is to create a set of guidelines that the system will follow to assess relevance accurately. 
First, consider the key elements that indicate relevance between a user's question and a retrieved document. 
Think about how the system can analyze the content to make this determination effectively.
Consider the fact that retrieval will not be used when the score is below 0.5.
Please ANSWER TOTOAL SCORE IN FORMAT '#Answer:float'.
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Context : {context}
User Question : {question}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""

prompt = ChatPromptTemplate.from_template(template)

rag_chain = (
    {"context": itemgetter("context"),
    "question": itemgetter("question")}
    | prompt
    | llm
)

filtered_rags = []
for rag in rags:
    output = rag_chain.invoke({"context": rag, "question": question})
    score = extract_score_from_output(output.content)
    if (score is not None and score > 0.5):
        filtered_rags.extend(rag)

## RAG + query로 질문

In [10]:
template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    
You are assistant who answers about given context.
Answer "no context" if there isn't any given context.
'''
Context
{context}
''
<|eot_id|>
<|start_header_id|>user<|end_header_id|>

{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

prompt = ChatPromptTemplate.from_template(template)

rag_chain = (
    {"context": itemgetter("context"),
    "question": itemgetter("question")}
    | prompt
    | llm
)

output = rag_chain.invoke({"context": filtered_rags, "question": question})


## Hallucination Checker

In [11]:
def extract_boolean_from_output(content):
    pattern = r"#Answer:\s*(True|False)"
    matches = re.findall(pattern, content, re.IGNORECASE)  # Using re.IGNORECASE to handle different cases like 'true' or 'false'
    
    if matches:
        answer = matches[0].lower()
        return True if answer == 'true' else False
    return None

template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    
You are a hallucination checker AI. Your task is to assess whether a given answer generated by the LLM model corresponds to a user's question and determine if it exhibits signs of hallucination. 
If the answer is deemed hallucinatory, provide guidance on how to identify and correct such responses. If the answer is coherent, acknowledge its clarity and relevance to the question.

Take a deep breath and let's take this step by step.

Analyze the user's question carefully.
Evaluate the LLM-generated answer in relation to the question.
Determine if the answer shows signs of hallucination or if it directly addresses the question.
If hallucination is detected, provide feedback on how to improve the response.
If the answer is coherent, acknowledge its relevance and clarity.
Please answer "#Answer:true" if it is hallucinated otherwise "#Answer:false"
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
LLM-generated answer
{answer}
''
User question
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

prompt = ChatPromptTemplate.from_template(template)

rag_chain = (
    {"answer": itemgetter("answer"),
    "question": itemgetter("question")}
    | prompt
    | llm
)

final_output = rag_chain.invoke({"answer": output.content, "question": question})

is_hallucinated = extract_boolean_from_output(final_output.content)
if not is_hallucinated:
    print(output.content)

Based on the given context, it seems that you are referring to the concept of agent memory in the context of LLM-powered autonomous agents.

In this context, agent memory refers to the ability of an agent to retain and recall information over extended periods. This is different from short-term memory, which is used for learning and processing information in real-time.

According to the text, long-term memory provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval.

In addition, the text mentions that LLM-powered autonomous agents combine LLM with memory, planning, and reflection mechanisms to enable agents to behave conditioned on past experience, as well as interact with other agents. This suggests that agent memory plays a crucial role in enabling these agents to learn from their experiences and adapt to new situations.

Overall, agent memory is an important aspect of LLM