# RAG

In [2]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [3]:
from fastapi import FastAPI



In [None]:
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    model="text-embedding-3-large",
    # dimensions: Optional[int] = None, # Can specify dimensions with new text-embedding-3 models
    # azure_endpoint="https://<your-endpoint>.openai.azure.com/", If not provided, will read env variable AZURE_OPENAI_ENDPOINT
    # api_key=... # Can provide an API key directly. If missing read env variable AZURE_OPENAI_API_KEY
    # openai_api_version=..., # If not provided, will read env variable AZURE_OPENAI_API_VERSION
)

In [None]:
from langchain.embeddings import GPT4AllEmbeddings
from langchain_chroma import Chroma
from langchain.vectorstores import FAISS
from langchain_openai import AzureOpenAIEmbeddings

# Crear el objeto de embeddings
# embedding_function = GPT4AllEmbeddings()


embedding_function = AzureOpenAIEmbeddings(
    model=deployment_name,  # Nombre del modelo desplegado
    api_key=openai_api_key,
    azure_endpoint=openai_api_base,
    openai_api_version=openai_api_version,
)

# # Cargar la base de datos existente
# persist_directory = "./data/chroma_langchain_db"
# vectorstore = Chroma(collection_name="products", 
#                      embedding_function=embedding_function, 
#                      persist_directory=persist_directory)



# Datos del producto
products = [
    {"id": 1, "name": "Collar de perlas", "color": "rosa", "price": 50.0, "description": "Un elegante collar de perlas."},
    {"id": 2, "name": "Anillo de diamantes", "color": "blanco", "price": 200.0, "description": "Anillo con diamantes."},
    {"id": 3, "name": "Collar de perlas", "color": "blanco", "price": 50.0, "description": "Un elegante collar de perlas."},
    {"id": 4, "name": "Anillo de diamantes", "color": "blanco", "price": 200.0, "description": "Anillo con diamantes."},
    # Añadir más productos aquí
]

# Crear las cadenas concatenadas y metadatos
texts = []
metadatas = []
for product in products:
    text = f"Producto: {product['name']}. Color: {product['color']}. Precio: {product['price']}. Descripción: {product['description']}."
    metadata = {"id": product['id'], "color": product['color'], "price": product['price']}
    texts.append(text)
    metadatas.append(metadata)

# Crear el índice FAISS con los textos y metadatos
vectorstore = FAISS.from_texts(
    texts=texts,
    embedding=embedding_function,
    metadatas=metadatas
)

# Guardar el índice FAISS en disco (opcional)
faiss_index_file = "./faiss_index"
vectorstore.save_local(faiss_index_file)

# Cargar el índice FAISS desde disco (si lo guardaste previamente)
loaded_vectorstore = FAISS.load_local(
    folder_path=faiss_index_file,
    embeddings=embedding_function,
    allow_dangerous_deserialization=True
)
# Añadir los textos y metadatos a la base de datos
# vectorstore.add_texts(texts, metadatas)


In [25]:
# Consultar el índice
query = "Color blanco"
results = loaded_vectorstore.similarity_search(query, k=10)

# Mostrar los resultados
for result in results:
    print(result.page_content, result.metadata)

VectorStoreRetriever(tags=['FAISS', 'AzureOpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002297CB75BD0>, search_kwargs={})

In [32]:
vectorstore.index.ntotal

4

In [31]:
loaded_vectorstore.vectorstore.index.ntotal

ValueError: could not convert string to float: 'Color blanco'

In [18]:
query = "Productos"
results = loaded_vectorstore.similarity_search(query, k=20, filter={"color": "blanco"})

for result in results:
    print(result.page_content, result.metadata)

Producto: Anillo de diamantes. Color: blanco. Precio: 200.0. Descripción: Anillo con diamantes.. {'id': 2, 'color': 'blanco', 'price': 200.0}
Producto: Anillo de diamantes. Color: blanco. Precio: 200.0. Descripción: Anillo con diamantes.. {'id': 4, 'color': 'blanco', 'price': 200.0}
Producto: Collar de perlas. Color: blanco. Precio: 50.0. Descripción: Un elegante collar de perlas.. {'id': 3, 'color': 'blanco', 'price': 50.0}


In [2]:
query = "quiero productos de color blanco"
results = vectorstore.similarity_search(query, k=3)

# # Filtrar por metadatos si necesitas obtener productos de un color específico
# filtered_results = [result for result in results if "blanco" in result.metadata.get("color", "")]
# print(filtered_results)


In [3]:
results

[Document(metadata={'color': 'blanco', 'id': 2, 'price': 200.0}, page_content='Anillo de diamantes'),
 Document(metadata={'color': 'blanco', 'id': 3, 'price': 50.0}, page_content='Collar de perlas'),
 Document(metadata={'color': 'rosa', 'id': 1, 'price': 50.0}, page_content='Collar de perlas')]

In [8]:
retriever = vectorstore.as_retriever(
    search_type="mmr", search_kwargs={"k": 3, "fetch_k": 5}
)
retriever.invoke("quiero productos de color blanco")

[Document(metadata={'color': 'blanco', 'id': 2, 'price': 200.0}, page_content='Anillo de diamantes'),
 Document(metadata={'color': 'blanco', 'id': 2, 'price': 200.0}, page_content='Anillo de diamantes'),
 Document(metadata={'color': 'rosa', 'id': 1, 'price': 50.0}, page_content='Collar de perlas')]

In [12]:
total_documents = vectorstore._collection.get()
total_documents.keys()

dict_keys(['ids', 'embeddings', 'documents', 'uris', 'data', 'metadatas', 'included'])

In [16]:
len(total_documents["documents"])

7

# Chatbot

In [6]:
import os
from langchain_openai import AzureChatOpenAI
from dotenv import load_dotenv

load_dotenv()

True

In [7]:
model = AzureChatOpenAI(
    azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
    api_key=os.getenv("OPENAI_API_KEY"),
    deployment_name=os.getenv("OPENAI_DEPLOIMENT_MODEL"),
    api_version=os.getenv("OPENAI_API_VERSION")
)



In [8]:
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content="You are a helpful assistant. Answer all questions to the best of your ability."
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

ai_msg = chain.invoke(
    {
        "messages": [
            HumanMessage(
                content="Translate from English to French: I love programming."
            ),
            AIMessage(content="J'adore la programmation."),
            HumanMessage(content="What did you just say?"),
        ],
    }
)
print(ai_msg.content)

I said "J'adore la programmation," which translates to "I love programming" in English.


In [36]:
from langchain_core.messages import trim_messages
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define trimmer
# count each message as 1 "token" (token_counter=len) and keep only the last two messages
trimmer = trim_messages(strategy="last", max_tokens=2, token_counter=len)

workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    trimmed_messages = trimmer.invoke(state["messages"])
    system_prompt = (
        "You are a helpful assistant. "
        "Answer all questions to the best of your ability."
    )
    messages = [SystemMessage(content=system_prompt)] + trimmed_messages
    response = model.invoke(messages)
    return {"messages": response}


# Define the node and edge
workflow.add_node("model", call_model)
workflow.add_edge(START, "model")

# Add simple in-memory checkpointer
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [37]:
app.invoke(
    {"messages": [HumanMessage(content="Translate to French: I love programming.")]},
    config={"configurable": {"thread_id": "1"}},
)

{'messages': [HumanMessage(content='Translate to French: I love programming.', additional_kwargs={}, response_metadata={}, id='bb2071ba-1d01-4af6-a04c-c251054d02c0'),
  AIMessage(content="J'aime programmer.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 35, 'total_tokens': 39, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_5154047bf2', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprob

In [1]:
from langchain_core.messages import HumanMessage, RemoveMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    system_prompt = (
        "You are a helpful assistant. "
        "Answer all questions to the best of your ability. "
        "The provided chat history includes a summary of the earlier conversation."
    )
    system_message = SystemMessage(content=system_prompt)
    message_history = state["messages"][:-1]  # exclude the most recent user input
    # Summarize the messages if the chat history reaches a certain size
    if len(message_history) >= 4:
        last_human_message = state["messages"][-1]
        # Invoke the model to generate conversation summary
        summary_prompt = (
            "Distill the above chat messages into a single summary message. "
            "Include as many specific details as you can."
        )
        summary_message = model.invoke(
            message_history + [HumanMessage(content=summary_prompt)]
        )

        # Delete messages that we no longer want to show up
        delete_messages = [RemoveMessage(id=m.id) for m in state["messages"]]
        # Re-add user message
        human_message = HumanMessage(content=last_human_message.content)
        # Call the model with summary & response
        response = model.invoke([system_message, summary_message, human_message])
        message_updates = [summary_message, human_message, response] + delete_messages
    else:
        message_updates = model.invoke([system_message] + state["messages"])

    return {"messages": message_updates}


# Define the node and edge
workflow.add_node("model", call_model)
workflow.add_edge(START, "model")

# Add simple in-memory checkpointer
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [13]:
a = app.invoke(
    {"messages": [HumanMessage(content="Holaa")]},
    config={"configurable": {"thread_id": "1"}},
)

a["messages"][-1].content

state: [AIMessage(content='The user repeatedly asked, "What did I just ask you?" three times in succession, prompting a response that acknowledged their inquiry.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 76, 'total_tokens': 102, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_5154047bf2', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate'

'¡Hola! ¿Cómo puedo ayudarte hoy?'

In [28]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chat_models import AzureChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import GPT4AllEmbeddings
from langchain_chroma import Chroma

# Crear el objeto de embeddings
embedding_function = GPT4AllEmbeddings()

# Cargar la base de datos existente
persist_directory = "./data/chroma_langchain_db"
vectorstore = Chroma(collection_name="products", 
                     embedding_function=embedding_function, 
                     persist_directory=persist_directory)

# Configurar el retriever
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "fetch_k": len(vectorstore._collection.get()["documents"])})

# Crear la memoria
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, input_key="question")

# Crear el prompt
prompt = PromptTemplate(
    input_variables=["context", "chat_history", "question"],
    template=(
        "You are a helpful assistant. Use the context below to answer the question."
        "\n\nChat History:\n{chat_history}\n\nContext:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"
    )
)

# Crear la cadena RetrievalQA con memoria
qa_chain = RetrievalQA.from_chain_type(
    llm=model,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt, "memory": memory}
)

# Usar la cadena con memoria
query_1 = "cual ha sido mi última pregunta?"
result_1 = qa_chain({"query": query_1})
print("Answer 1:", result_1["result"])



Answer 1: No has hecho ninguna pregunta anterior en este chat.


In [9]:
from langchain.schema import Document
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain.embeddings import GPT4AllEmbeddings
from langchain_chroma import Chroma

# Crear el objeto de embeddings (esto depende de tu configuración exacta)
# Asegúrate de reemplazar GPT4AllEmbeddings con la correcta si es necesario
embedding_function = GPT4AllEmbeddings()

# Cargar la base de datos existente
persist_directory = "./data/chroma_langchain_db"
vectorstore = Chroma(
    collection_name="products",
    embedding_function=embedding_function,
    persist_directory=persist_directory
)

# Configurar el retriever
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5})

# Formatear la entrada para el modelo
def format_input_to_model(query, retrieved_docs):
    """Formatear el input del modelo con el prompt inicial y los documentos recuperados."""
    system_prompt = "You are a helpful assistant. Use the following context to answer the question."
    context = "\n\n".join(retrieved_docs)
    final_prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}"
    return [SystemMessage(content=system_prompt), HumanMessage(content=final_prompt)]

# Modelo simulado para generar una respuesta
def dummy_model(messages):
    """Simular la respuesta del modelo."""
    system_message, human_message = messages
    response_content = f"Simulated response based on context: {human_message.content}"
    return AIMessage(content=response_content)

# Pipeline manual
def run_pipeline(query):
    # Recuperar documentos
    retrieved_docs = [doc.page_content for doc in retriever.get_relevant_documents(query)]
    # Formatear entrada para el modelo
    messages = format_input_to_model(query, retrieved_docs)
    # Llamar al modelo
    response = dummy_model(messages)
    return response

# Ejecutar el pipeline
query = "qué te pregunte antes?"
result = run_pipeline(query)

# Imprimir respuesta final
print(result)


Number of requested results 20 is greater than number of elements in index 11, updating n_results = 11


content='Simulated response based on context: You are a helpful assistant. Use the following context to answer the question.\n\nContext:\nAnillo de diamantes\n\nAnillo de diamantes\n\nAnillo de diamantes\n\nAnillo de diamantes\n\nCollar de perlas\n\nQuestion: qué te pregunte antes?' additional_kwargs={} response_metadata={}


In [None]:
from langchain.schema import Document
from langchain.chains import RunnableMap, RunnablePassthrough
from langchain.chains.base import Runnable
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

# 1. Definir el retriever personalizado
class CustomRetriever(Runnable):
    def __init__(self, vectorstore):
        self.vectorstore = vectorstore

    def invoke(self, query):
        # Lógica de recuperación basada en la función proporcionada
        all_doc = self.vectorstore._collection.get()["documents"]
        total_len_doc = len(all_doc)
        
        # Recuperar documentos usando el vectorstore
        retriever = self.vectorstore.as_retriever(
            search_type="mmr", search_kwargs={"k": 10, "fetch_k": total_len_doc}
        )
        result = retriever.invoke(query)

        # Procesar los resultados
        output_ids = []
        output_text = []
        for doc in result:
            if doc.metadata["id"] not in output_ids:
                output_ids.append(doc.metadata["id"])
                output_text.append(doc.page_content)

        return output_text

# 2. Formatear la entrada para el modelo
def format_input_to_model(inputs):
    """Formatear el input del modelo con el prompt inicial y los documentos recuperados."""
    query = inputs["query"]
    retrieved_docs = inputs["retrieved_docs"]

    # Crear el prompt del sistema
    system_prompt = (
        "You are a helpful assistant. Use the following context to answer the question."
    )
    
    # Crear contexto con los documentos recuperados
    context = "\n\n".join(retrieved_docs)
    
    # Formatear el prompt final
    final_prompt = f"{system_prompt}\n\nContext:\n{context}\n\nQuestion: {query}"

    return [SystemMessage(content=system_prompt), HumanMessage(content=final_prompt)]

# 3. Supongamos que el modelo es un LLM ya cargado, como OpenAI, o un modelo previamente inicializado
class DummyModel(Runnable):
    def invoke(self, inputs):
        # Aquí simulamos la respuesta del modelo.
        query = inputs['query']
        context = inputs['context']
        return AIMessage(content=f"Answer based on context: {context} \n\nAnswer: We recommend considering the following aspects...")

# 4. Crear el flujo del pipeline
vectorstore = ...  # Tu instancia de vectorstore aquí
retriever = CustomRetriever(vectorstore)  # Instancia del retriever personalizado

# 5. Definir las etapas del pipeline
pipeline = RunnableMap({
    "retrieved_docs": retriever,  # Etapa de recuperación de documentos
    "query": RunnablePassthrough()  # Pasar la query directamente como entrada
}) | RunnableMap({
    "messages": format_input_to_model  # Formatear entrada para el modelo
}) | DummyModel()  # Llamada al modelo (esto lo reemplazarías con tu modelo real)

# 6. Ejecutar el pipeline
query = "What are the benefits of pink products?"
result = pipeline.invoke({"query": query})

# 7. Imprimir respuesta final
print(result.content)


In [1]:
from rag_manager import RAGManager

rag_mngr = RAGManager()

In [3]:
rag_mngr.retrieve_data("quiero los productos para el cuello")

total_len_doc:4
{"id": "producto", "color": "color", "precio": "precio"}
<class 'str'>
{'id': 'producto', 'color': 'color', 'precio': 'precio'}
<class 'dict'>
filtros: {'id': 'producto', 'color': 'color', 'precio': 'precio'}
[]


([], [])

In [3]:
rag_mngr.chatbot("qué te he preguntado anteriormente", 1)

total_len_doc:4
{}
<class 'str'>
{}
<class 'dict'>
filtros: {}
[Document(id='54c4b021-1f68-486e-88dd-5a35c61d9ea2', metadata={'id': 2, 'color': 'blanco', 'price': 200.0}, page_content='Producto: Anillo de diamantes. Color: blanco. Precio: 200.0. Descripción: Anillo con diamantes..'), Document(id='831df401-1741-4ce0-bb94-218995cc4d9a', metadata={'id': 4, 'color': 'blanco', 'price': 200.0}, page_content='Producto: Anillo de diamantes. Color: blanco. Precio: 200.0. Descripción: Anillo con diamantes..'), Document(id='12da9b5d-53d2-4218-8be9-a9c87e5c2d4e', metadata={'id': 3, 'color': 'blanco', 'price': 50.0}, page_content='Producto: Collar de perlas. Color: blanco. Precio: 50.0. Descripción: Un elegante collar de perlas..'), Document(id='aa77003b-fe15-43bd-abbb-a89d06e9019e', metadata={'id': 1, 'color': 'rosa', 'price': 50.0}, page_content='Producto: Collar de perlas. Color: rosa. Precio: 50.0. Descripción: Un elegante collar de perlas..')]
output_text: ['Producto: Anillo de diamantes. Colo

'Anteriormente me preguntaste por productos para el cuello y de color blanco. Te proporcioné información sobre el "Collar de perlas" que es blanco y adecuado para el cuello. Si necesitas más ayuda o información, no dudes en preguntar.'