In [1]:
import warnings
warnings.filterwarnings("ignore")


## RAG

* Embedding model and LLM

In [4]:
import os
import boto3
from dotenv import load_dotenv
load_dotenv(override=True)

region_name = os.getenv("AWS_LLM_REGION_NAME")
model_id = os.getenv("AWS_LLM_MODEL_ID") 
embedding_model_id = os.getenv("AWS_EMBEDDING_MODEL_ID")

#Create the Bedrock client and the Bedrock LLM
client = boto3.client(service_name="bedrock-runtime", region_name=region_name)


In [6]:
from langchain_aws import ChatBedrockConverse


bedrock_llm=  ChatBedrockConverse(
                model= model_id ,
                temperature=0,
                max_tokens=None,
                client=client)

In [7]:
from langchain_aws.embeddings import BedrockEmbeddings
embedder = BedrockEmbeddings(client=client,
                                model_id=embedding_model_id,
                                region_name=region_name)

* Vector store

In [8]:
from source.src.utils.faiss_m import FAISS_M

MAR2_PATH_INDEX='../app/scenarios/index-mar2/index/FAISS_amazon.titan-embed-text-v1_chunk_size_500_overlap_50'
mar2_index = FAISS_M.load_local(MAR2_PATH_INDEX, embedder, allow_dangerous_deserialization=True)

In [11]:
base_retriever = mar2_index.as_retriever()
base_retriever.search_kwargs["k"]=3

* Rag 

In [13]:
from langchain.prompts.chat import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain_core.output_parsers import StrOutputParser

system_template = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.

Question: {question} 

Context: {context} 

Answer:"""

user_template = "{question}"

rag_prompt = ChatPromptTemplate.from_messages([
            ("system",system_template),
            ("user",user_template)
        ])

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)



rag_chain = (
    {"context": base_retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | bedrock_llm
    | StrOutputParser()
)

rag_chain.invoke("¿Qué trata el escenario Queue ?")

'El escenario Queue trata sobre el servicio Amazon Simple Queue Service (SQS) de AWS. SQS es un servicio de cola de mensajes que permite enviar, almacenar y recibir mensajes de forma asíncrona entre componentes de software distribuidos.\n\nAlgunas características clave del escenario Queue con SQS son:\n\n1) Permite desacoplar y escalar microservicios y aplicaciones sin servidor enviando mensajes a una cola en lugar de invocaciones directas.\n\n2) Explica el ciclo de vida de los mensajes en SQS, donde un productor envía mensajes, un consumidor los recibe y procesa, y luego los elimina de la cola.\n\n3) Menciona consideraciones importantes como el tiempo de visibilidad de los mensajes, reintentos con MaxReceiveCount y gestión de errores moviendo mensajes a una cola de mensajes fallidos.\n\nEn resumen, el escenario Queue cubre el uso del servicio SQS de AWS como mecanismo de comunicación asíncrona y desacoplada entre componentes distribuidos mediante colas de mensajes.'

## Trulens

* Bedrock llm - trulens lib

In [14]:
from trulens.providers.bedrock import Bedrock

bedrock = Bedrock(model_id=model_id, region_name=region_name)


* The RAG triad is made up of 3 evaluations: 
    - context relevance
    - groundedness
    - answer relevance. 

Satisfactory evaluations on each provides us confidence that our LLM app is free from hallucination.

* Instrument chain for logging with TruLens

In [15]:
from trulens.core import TruSession

session = TruSession()
session.reset_database()

🦑 TruSession initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


* Initialize Feedback Function(s)

In [16]:
from trulens.core import Feedback
import numpy as np

from trulens.apps.langchain import TruChain


context = TruChain.select_context(rag_chain)
# Context relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        bedrock.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(context.collect())
    .aggregate(np.mean)  # choose a different aggregation method if you wish
)

✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input context will be set to __record__.app.first.steps__.context.first.invoke.rets[:].page_content.collect() .


In [17]:
# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
    bedrock.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


In [18]:
# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        bedrock.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(context.collect())
    .on_output()
)

✅ In Groundedness, input source will be set to __record__.app.first.steps__.context.first.invoke.rets[:].page_content.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [19]:

tru_recorder = TruChain(
    rag_chain,
    app_name="ChatApplication",
    app_version="Chain1",
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness]
)

In [20]:
questions = [
    "¿Qué trata el escenario Queue ?",
    "¿Cual es la raiz cuadrada de pi?",
]
with tru_recorder as recording:
    for question in questions:
        llm_response = rag_chain.invoke(question)



* Explore results in the notebook

In [21]:
session.get_records_and_feedback()[0]

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,app_name,app_version,latency,total_tokens,total_cost,cost_currency
0,app_hash_8b39eafded6d3f261511a7e6bd65cb42,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_7e1d31e0b7db4b3730a0ac6ae974a972,"""\u00bfQu\u00e9 trata el escenario Queue ?""","""El escenario Queue trata sobre el servicio Am...",-,"{""record_id"": ""record_hash_7e1d31e0b7db4b3730a...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-09-22T11:14:58.773207"", ""...",2024-09-22T11:15:05.472035,ChatApplication,Chain1,6,0,0.0,USD
1,app_hash_8b39eafded6d3f261511a7e6bd65cb42,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_a32026b96df0079fd1e88b242a5bce1f,"""\u00bfCual es la raiz cuadrada de pi?""","""Desafortunadamente, la ra\u00edz cuadrada de ...",-,"{""record_id"": ""record_hash_a32026b96df0079fd1e...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-09-22T11:15:05.675275"", ""...",2024-09-22T11:15:09.159283,ChatApplication,Chain1,3,0,0.0,USD


In [22]:
df = session.get_records_and_feedback()[0]
df.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,...,Answer Relevance_calls,Context Relevance_calls,Answer Relevance feedback cost in USD,Context Relevance feedback cost in USD,app_name,app_version,latency,total_tokens,total_cost,cost_currency
0,app_hash_8b39eafded6d3f261511a7e6bd65cb42,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_7e1d31e0b7db4b3730a0ac6ae974a972,"""\u00bfQu\u00e9 trata el escenario Queue ?""","""El escenario Queue trata sobre el servicio Am...",-,"{""record_id"": ""record_hash_7e1d31e0b7db4b3730a...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-09-22T11:14:58.773207"", ""...",...,[{'args': {'prompt': '¿Qué trata el escenario ...,[{'args': {'question': '¿Qué trata el escenari...,0.0,0.0,ChatApplication,Chain1,6,0,0.0,USD
1,app_hash_8b39eafded6d3f261511a7e6bd65cb42,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_a32026b96df0079fd1e88b242a5bce1f,"""\u00bfCual es la raiz cuadrada de pi?""","""Desafortunadamente, la ra\u00edz cuadrada de ...",-,"{""record_id"": ""record_hash_a32026b96df0079fd1e...","{""n_requests"": 0, ""n_successful_requests"": 0, ...","{""start_time"": ""2024-09-22T11:15:05.675275"", ""...",...,,,,,ChatApplication,Chain1,3,0,0.0,USD


*  Explore in a Dashboard

In [28]:
session.get_leaderboard(app_ids=[tru_recorder.app_id])

Unnamed: 0_level_0,Unnamed: 1_level_0,Groundedness,Answer Relevance,Context Relevance,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ChatApplication,Chain1,0.5,0.666667,0.5,4.5,0.0
