In [117]:
from langchain_community.chat_models import ChatOllama
llm = ChatOllama(model='llama3', temperature=0)

In [118]:
# pip install langchain_chroma

In [119]:
import os

# Set the API key
# os.environ['OPENAI_API_KEY'] = ""

In [120]:
# pip install langchain_text_splitters

In [121]:
# pip install langchain_openai

## Todo
0. vector store에 데이터 저장 [v]
1. 유저로부터 Query 받기 [v]
2. 쿼리로 retrieval 얻기 [v]
3. 관련성 체크 (cosine similarity가 N이상인지?) [v]
4. 통과 못한 경우 context에 없다고 답변 [v]
5. 통과한 경우 RAG + query를 넣어서 답변 [v]
6. 할루시네이션 체크 (근거가 있는 사실인지 물어보기)
7. 할루시네이션이 아닌 경우 바로 답변
8. 할루시네이션인 경우 다시 답변 생성 (retry 5 times)

## Split and Load

In [122]:
# Build a sample vectorDB
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

all_splits = []

for url in urls:
    loader = WebBaseLoader(url)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
    splits = text_splitter.split_documents(data)
    all_splits.extend(splits)
    
    # VectorDB
    embedding = OpenAIEmbeddings()
    vectordb = Chroma.from_documents(persist_directory="./chroma_db", documents=all_splits, embedding=embedding)

## Query

In [123]:
from langchain.retrievers.multi_query import MultiQueryRetriever

question = "agent memory"
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm
)

rags = retriever_from_llm.invoke(question)
len(rags)

6

## Relevance Check

In [124]:
import math
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough

def cosine_similarity(vec1, vec2):
    # Step 1: Compute the dot product
    dot_product = sum(a * b for a, b in zip(vec1, vec2))

    # Step 2: Compute the magnitudes of the vectors
    magnitude_vec1 = math.sqrt(sum(a * a for a in vec1))
    magnitude_vec2 = math.sqrt(sum(b * b for b in vec2))

    # Step 3: Compute the cosine similarity
    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        # To avoid division by zero
        return 0.0
    return dot_product / (magnitude_vec1 * magnitude_vec2)

def prompt_router(query, rags):
    embeddings = OpenAIEmbeddings()

    query_embedding = embeddings.embed_query(query)

    rag_texts = [doc.page_content for doc in rags]  # Using the 'page_content' attribute
    rag_embeddings = embeddings.embed_documents(rag_texts) 

    similarities = [cosine_similarity(query_embedding, rag_embedding) for rag_embedding in rag_embeddings]

    # Filter documents with similarity > 0.7
    similar_documents = []
    for i, sim in enumerate(similarities):
        print(sim)
        if sim > 0.8:
            similar_documents.append(rags[i])  # Assuming rags is a list of Document objects

    return similar_documents

similar_documents = prompt_router(question, rags)
print(similar_documents)
# 어떻게 이 결과를 llm에 넘겨서 없다고 대답하게 하지.. 

0.7727910001943669
0.7661705053070712
0.7979500562752266
0.7510829761761391
0.7522405097294081
0.7299772381868517
[]


## RAG + query로 질문

In [125]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

def documents_to_dicts(documents):
    result = []
    for doc in documents:
        doc_dict = {
            "page_content": doc.page_content,
            "metadata": {
                "description": doc.metadata.get('description', ''),
                "language": doc.metadata.get('language', ''),
                "source": doc.metadata.get('source', ''),
                "title": doc.metadata.get('title', '')
            }
        }
        result.append(doc_dict)
    return result

# # Decomposition
template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    
You are assistant who answers about given context.
Answer "no context" if there isn't any given context.
'''
Context
{context}
'''
#Answer [list of numbers]
<|eot_id|>
<|start_header_id|>user<|end_header_id|>

{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

context_text = "\n\n\n".join(doc.page_content for doc in similar_documents)


prompt = ChatPromptTemplate.from_template(template)

rag_chain = (
    {"context": itemgetter("context"),
    "question": itemgetter("question")}
    | prompt
    | llm
)

output = rag_chain.invoke({"context": similar_documents, "question": question})

print(output)

# rag_chain.invoke("What is Task Decomposition?")


# # LLM
# llm = ChatOpenAI(temperature=0)

# # Chain
# generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# # Run
# question = "What are the main components of an LLM-powered autonomous agent system?"
# questions = generate_queries_decomposition.invoke({"question":question})



content='No context.' response_metadata={'model': 'llama3', 'created_at': '2024-06-18T08:07:56.329883Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 354902708, 'load_duration': 3375875, 'prompt_eval_count': 58, 'prompt_eval_duration': 292145000, 'eval_count': 4, 'eval_duration': 57440000} id='run-f45bfd4c-0dee-4fa9-bf6e-29cedf55eb68-0'


## Hallucination Checker